diff --git a/app.py b/app.py index a699bc5b3c2e987102ca93e0ee28d601e0a93d02..6bb3ad1ae35f8695704c994363e12ee72da99298 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,62 @@ import gradio as gr +import cv2 +import numpy as np +from PIL import Image +import base64 +from io import BytesIO +from models.image_text_transformation import ImageTextTransformation -def greet(name): - return "Hello " + name + "!!" +def pil_image_to_base64(image): + buffered = BytesIO() + image.save(buffered, format="JPEG") + img_str = base64.b64encode(buffered.getvalue()).decode() + return img_str -iface = gr.Interface(fn=greet, inputs="text", outputs="text") -iface.launch() \ No newline at end of file +def add_logo(): + with open("examples/logo.png", "rb") as f: + logo_base64 = base64.b64encode(f.read()).decode() + return logo_base64 + +def process_image(image_src, processor): + gen_text = processor.image_to_text(image_src) + gen_image = processor.text_to_image(gen_text) + gen_image_str = pil_image_to_base64(gen_image) + # Combine the outputs into a single HTML output + custom_output = f''' +
{gen_text}
+{gen_text}
+{gen_text}
+{gen_text}
+CUDA | torch 1.10 | torch 1.9 | torch 1.8 |
---|---|---|---|
11.3 | install | ||
11.1 | install | install | install |
10.2 | install | install | install |
10.1 | install | ||
cpu | install | install | install |
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|
R50-C4 | +1x | +0.551 | +0.102 | +4.8 | +35.7 | +137257644 | +model | metrics | +
R50-DC5 | +1x | +0.380 | +0.068 | +5.0 | +37.3 | +137847829 | +model | metrics | +
R50-FPN | +1x | +0.210 | +0.038 | +3.0 | +37.9 | +137257794 | +model | metrics | +
R50-C4 | +3x | +0.543 | +0.104 | +4.8 | +38.4 | +137849393 | +model | metrics | +
R50-DC5 | +3x | +0.378 | +0.070 | +5.0 | +39.0 | +137849425 | +model | metrics | +
R50-FPN | +3x | +0.209 | +0.038 | +3.0 | +40.2 | +137849458 | +model | metrics | +
R101-C4 | +3x | +0.619 | +0.139 | +5.9 | +41.1 | +138204752 | +model | metrics | +
R101-DC5 | +3x | +0.452 | +0.086 | +6.1 | +40.6 | +138204841 | +model | metrics | +
R101-FPN | +3x | +0.286 | +0.051 | +4.1 | +42.0 | +137851257 | +model | metrics | +
X101-FPN | +3x | +0.638 | +0.098 | +6.7 | +43.0 | +139173657 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|
R50 | +1x | +0.205 | +0.041 | +4.1 | +37.4 | +190397773 | +model | metrics | +
R50 | +3x | +0.205 | +0.041 | +4.1 | +38.7 | +190397829 | +model | metrics | +
R101 | +3x | +0.291 | +0.054 | +5.2 | +40.4 | +190397697 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+prop. AR |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|
RPN R50-C4 | +1x | +0.130 | +0.034 | +1.5 | ++ | 51.6 | +137258005 | +model | metrics | +
RPN R50-FPN | +1x | +0.186 | +0.032 | +2.7 | ++ | 58.0 | +137258492 | +model | metrics | +
Fast R-CNN R50-FPN | +1x | +0.140 | +0.029 | +2.6 | +37.8 | ++ | 137635226 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|
R50-C4 | +1x | +0.584 | +0.110 | +5.2 | +36.8 | +32.2 | +137259246 | +model | metrics | +
R50-DC5 | +1x | +0.471 | +0.076 | +6.5 | +38.3 | +34.2 | +137260150 | +model | metrics | +
R50-FPN | +1x | +0.261 | +0.043 | +3.4 | +38.6 | +35.2 | +137260431 | +model | metrics | +
R50-C4 | +3x | +0.575 | +0.111 | +5.2 | +39.8 | +34.4 | +137849525 | +model | metrics | +
R50-DC5 | +3x | +0.470 | +0.076 | +6.5 | +40.0 | +35.9 | +137849551 | +model | metrics | +
R50-FPN | +3x | +0.261 | +0.043 | +3.4 | +41.0 | +37.2 | +137849600 | +model | metrics | +
R101-C4 | +3x | +0.652 | +0.145 | +6.3 | +42.6 | +36.7 | +138363239 | +model | metrics | +
R101-DC5 | +3x | +0.545 | +0.092 | +7.6 | +41.9 | +37.3 | +138363294 | +model | metrics | +
R101-FPN | +3x | +0.340 | +0.056 | +4.6 | +42.9 | +38.6 | +138205316 | +model | metrics | +
X101-FPN | +3x | +0.690 | +0.103 | +7.2 | +44.3 | +39.5 | +139653917 | +model | metrics | +
Name | +epochs | +train time (s/im) |
+inference time (s/im) |
+box AP |
+mask AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|
R50-FPN | +100 | +0.376 | +0.069 | +44.6 | +40.3 | +42047764 | +model | metrics | +
R50-FPN | +200 | +0.376 | +0.069 | +46.3 | +41.7 | +42047638 | +model | metrics | +
R50-FPN | +400 | +0.376 | +0.069 | +47.4 | +42.5 | +42019571 | +model | metrics | +
R101-FPN | +100 | +0.518 | +0.073 | +46.4 | +41.6 | +42025812 | +model | metrics | +
R101-FPN | +200 | +0.518 | +0.073 | +48.0 | +43.1 | +42131867 | +model | metrics | +
R101-FPN | +400 | +0.518 | +0.073 | +48.9 | +43.7 | +42073830 | +model | metrics | +
regnetx_4gf_dds_FPN | +100 | +0.474 | +0.071 | +46.0 | +41.3 | +42047771 | +model | metrics | +
regnetx_4gf_dds_FPN | +200 | +0.474 | +0.071 | +48.1 | +43.1 | +42132721 | +model | metrics | +
regnetx_4gf_dds_FPN | +400 | +0.474 | +0.071 | +48.6 | +43.5 | +42025447 | +model | metrics | +
regnety_4gf_dds_FPN | +100 | +0.487 | +0.073 | +46.1 | +41.6 | +42047784 | +model | metrics | +
regnety_4gf_dds_FPN | +200 | +0.487 | +0.072 | +47.8 | +43.0 | +42047642 | +model | metrics | +
regnety_4gf_dds_FPN | +400 | +0.487 | +0.072 | +48.2 | +43.3 | +42045954 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+kp. AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|
R50-FPN | +1x | +0.315 | +0.072 | +5.0 | +53.6 | +64.0 | +137261548 | +model | metrics | +
R50-FPN | +3x | +0.316 | +0.066 | +5.0 | +55.4 | +65.5 | +137849621 | +model | metrics | +
R101-FPN | +3x | +0.390 | +0.076 | +6.1 | +56.4 | +66.1 | +138363331 | +model | metrics | +
X101-FPN | +3x | +0.738 | +0.121 | +8.7 | +57.3 | +66.0 | +139686956 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+PQ | +model id | +download | + + +
---|---|---|---|---|---|---|---|---|---|
R50-FPN | +1x | +0.304 | +0.053 | +4.8 | +37.6 | +34.7 | +39.4 | +139514544 | +model | metrics | +
R50-FPN | +3x | +0.302 | +0.053 | +4.8 | +40.0 | +36.5 | +41.5 | +139514569 | +model | metrics | +
R101-FPN | +3x | +0.392 | +0.066 | +6.0 | +42.4 | +38.5 | +43.0 | +139514519 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|
R50-FPN | +1x | +0.292 | +0.107 | +7.1 | +23.6 | +24.4 | +144219072 | +model | metrics | +
R101-FPN | +1x | +0.371 | +0.114 | +7.8 | +25.6 | +25.9 | +144219035 | +model | metrics | +
X101-FPN | +1x | +0.712 | +0.151 | +10.2 | +26.7 | +27.1 | +144219108 | +model | metrics | +
Name | +train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+box AP50 |
+mask AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|
R50-FPN, Cityscapes | +0.240 | +0.078 | +4.4 | ++ | + | 36.5 | +142423278 | +model | metrics | +
R50-C4, VOC | +0.537 | +0.081 | +4.8 | +51.9 | +80.3 | ++ | 142202221 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|
Baseline R50-FPN | +1x | +0.261 | +0.043 | +3.4 | +38.6 | +35.2 | +137260431 | +model | metrics | +
Deformable Conv | +1x | +0.342 | +0.048 | +3.5 | +41.5 | +37.5 | +138602867 | +model | metrics | +
Cascade R-CNN | +1x | +0.317 | +0.052 | +4.0 | +42.1 | +36.4 | +138602847 | +model | metrics | +
Baseline R50-FPN | +3x | +0.261 | +0.043 | +3.4 | +41.0 | +37.2 | +137849600 | +model | metrics | +
Deformable Conv | +3x | +0.349 | +0.047 | +3.5 | +42.7 | +38.5 | +144998336 | +model | metrics | +
Cascade R-CNN | +3x | +0.328 | +0.053 | +4.0 | +44.3 | +38.5 | +144998488 | +model | metrics | +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|
Baseline R50-FPN | +3x | +0.261 | +0.043 | +3.4 | +41.0 | +37.2 | +137849600 | +model | metrics | +
GN | +3x | +0.309 | +0.060 | +5.6 | +42.6 | +38.6 | +138602888 | +model | metrics | +
SyncBN | +3x | +0.345 | +0.053 | +5.5 | +41.9 | +37.8 | +169527823 | +model | metrics | +
GN (from scratch) | +3x | +0.338 | +0.061 | +7.2 | +39.9 | +36.6 | +138602908 | +model | metrics | +
GN (from scratch) | +9x | +N/A | +0.061 | +7.2 | +43.7 | +39.6 | +183808979 | +model | metrics | +
SyncBN (from scratch) | +9x | +N/A | +0.055 | +7.2 | +43.6 | +39.3 | +184226666 | +model | metrics | +
Name | +inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+PQ | +model id | +download | + + +
---|---|---|---|---|---|---|---|
Panoptic FPN R101 | +0.098 | +11.4 | +47.4 | +41.3 | +46.1 | +139797668 | +model | metrics | +
Mask R-CNN X152 | +0.234 | +15.1 | +50.2 | +44.0 | ++ | 18131413 | +model | metrics | +
above + test-time aug. | ++ | + | 51.9 | +45.9 | ++ | + | + |
+ +> [**Probabilistic two-stage detection**](http://arxiv.org/abs/2103.07461), +> Xingyi Zhou, Vladlen Koltun, Philipp Krähenbühl, +> *arXiv technical report ([arXiv 2103.07461](http://arxiv.org/abs/2103.07461))* + +Contact: [zhouxy@cs.utexas.edu](mailto:zhouxy@cs.utexas.edu). Any questions or discussions are welcomed! + +## Abstract + +We develop a probabilistic interpretation of two-stage object detection. We show that this probabilistic interpretation motivates a number of common empirical training practices. It also suggests changes to two-stage detection pipelines. Specifically, the first stage should infer proper object-vs-background likelihoods, which should then inform the overall score of the detector. A standard region proposal network (RPN) cannot infer this likelihood sufficiently well, but many one-stage detectors can. We show how to build a probabilistic two-stage detector from any state-of-the-art one-stage detector. The resulting detectors are faster and more accurate than both their one- and two-stage precursors. Our detector achieves 56.4 mAP on COCO test-dev with single-scale testing, outperforming all published results. Using a lightweight backbone, our detector achieves 49.2 mAP on COCO at 33 fps on a Titan Xp. + +## Summary + +- Two-stage CenterNet: First stage estimates object probabilities, second stage conditionally classifies objects. + +- Resulting detector is faster and more accurate than both traditional two-stage detectors (fewer proposals required), and one-stage detectors (lighter first stage head). + +- Our best model achieves 56.4 mAP on COCO test-dev. + +- This repo also includes a detectron2-based CenterNet implementation with better accuracy (42.5 mAP at 70FPS) and a new FPN version of CenterNet (40.2 mAP with Res50_1x). + +## Main results + +All models are trained with multi-scale training, and tested with a single scale. The FPS is tested on a Titan RTX GPU. +More models and details can be found in the [MODEL_ZOO](projects/CenterNet2/centernet2_docs/MODEL_ZOO.md). + +#### COCO + +| Model | COCO val mAP | FPS | +|-------------------------------------------|---------------|-------| +| CenterNet-S4_DLA_8x | 42.5 | 71 | +| CenterNet2_R50_1x | 42.9 | 24 | +| CenterNet2_X101-DCN_2x | 49.9 | 8 | +| CenterNet2_R2-101-DCN-BiFPN_4x+4x_1560_ST | 56.1 | 5 | +| CenterNet2_DLA-BiFPN-P5_24x_ST | 49.2 | 38 | + + +#### LVIS + +| Model | val mAP box | +| ------------------------- | ----------- | +| CenterNet2_R50_1x | 26.5 | +| CenterNet2_FedLoss_R50_1x | 28.3 | + + +#### Objects365 + +| Model | val mAP | +|-------------------------------------------|----------| +| CenterNet2_R50_1x | 22.6 | + +## Installation + +Our project is developed on [detectron2](https://github.com/facebookresearch/detectron2). Please follow the official detectron2 [installation](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md). All our code is under `projects/CenterNet2/`. In theory, you should be able to copy-paste `projects/CenterNet2/` to the latest detectron2 release or your own detectron2 repo to run our project. There might be API changes in future detectron2 releases that make it incompatible. + +We use the default detectron2 demo script. To run inference on an image folder using our pre-trained model, run + +~~~ +python projects/CenterNet2/demo/demo.py --config-file projects/CenterNet2/configs/CenterNet2_R50_1x.yaml --input path/to/image/ --opts MODEL.WEIGHTS models/CenterNet2_R50_1x.pth +~~~ + +## Benchmark evaluation and training + +Please check detectron2 [GETTING_STARTED.md](https://github.com/facebookresearch/detectron2/blob/master/GETTING_STARTED.md) for running evaluation and training. Our config files are under `projects/CenterNet2/configs` and the pre-trained models are in the [MODEL_ZOO](projects/CenterNet2/centernet2_docs/MODEL_ZOO.md). + + +## License + +Our code under `projects/CenterNet2/` is under [Apache 2.0 license](projects/CenterNet2/LICENSE). `projects/CenterNet2/centernet/modeling/backbone/bifpn_fcos.py` are from [AdelaiDet](https://github.com/aim-uofa/AdelaiDet), which follows the original [non-commercial license](https://github.com/aim-uofa/AdelaiDet/blob/master/LICENSE). The code from detectron2 follows the original [Apache 2.0 license](LICENSE). + +## Citation + +If you find this project useful for your research, please use the following BibTeX entry. + + @inproceedings{zhou2021probablistic, + title={Probabilistic two-stage detection}, + author={Zhou, Xingyi and Koltun, Vladlen and Kr{\"a}henb{\"u}hl, Philipp}, + booktitle={arXiv preprint arXiv:2103.07461}, + year={2021} + } diff --git a/models/grit_src/third_party/CenterNet2/README_D2.md b/models/grit_src/third_party/CenterNet2/README_D2.md new file mode 100644 index 0000000000000000000000000000000000000000..a88ad7e21ce1d8651ec0d73848ce6dcd17f19d00 --- /dev/null +++ b/models/grit_src/third_party/CenterNet2/README_D2.md @@ -0,0 +1,62 @@ + + +Detectron2 is Facebook AI Research's next generation software system +that implements state-of-the-art object detection algorithms. +It is a ground-up rewrite of the previous version, +[Detectron](https://github.com/facebookresearch/Detectron/), +and it originates from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/). + +
Name | +lr sched |
+train time (s/iter) |
+inference time (s/im) |
+train mem (GB) |
+box AP |
+mask AP |
+kp. AP |
+model id | +download | + + +
---|---|---|---|---|---|---|---|---|---|
Faster R-CNN | +1x | +0.219 | +0.038 | +3.1 | +36.9 | ++ | + | 137781054 | +model | metrics | +
Keypoint R-CNN | +1x | +0.313 | +0.071 | +5.0 | +53.1 | ++ | 64.2 | +137781195 | +model | metrics | +
Mask R-CNN | +1x | +0.273 | +0.043 | +3.4 | +37.8 | +34.9 | ++ | 137781281 | +model | metrics | +