afrizalha commited on
Commit
67d78ab
1 Parent(s): 1f6f60c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. dwpose/.gitattributes +35 -0
  2. dwpose/README.md +3 -0
  3. dwpose/dw-ll_ucoco.pth +3 -0
  4. dwpose/dw-ll_ucoco_384.onnx +3 -0
  5. dwpose/dw-ll_ucoco_384.pth +3 -0
  6. dwpose/dw-mm_ucoco.pth +3 -0
  7. dwpose/dw-ss_ucoco.pth +3 -0
  8. dwpose/dw-tt_ucoco.pth +3 -0
  9. dwpose/rtm-l_ucoco_256-95bb32f5_20230822.pth +3 -0
  10. dwpose/rtm-x_ucoco_256-05f5bcb7_20230822.pth +3 -0
  11. dwpose/rtm-x_ucoco_384-f5b50679_20230822.pth +3 -0
  12. dwpose/yolox_l.onnx +3 -0
  13. face-parse-bisent/.gitignore +110 -0
  14. face-parse-bisent/6.jpg +0 -0
  15. face-parse-bisent/79999_iter.pth +3 -0
  16. face-parse-bisent/LICENSE +21 -0
  17. face-parse-bisent/README.md +68 -0
  18. face-parse-bisent/evaluate.py +95 -0
  19. face-parse-bisent/face_dataset.py +106 -0
  20. face-parse-bisent/hair.png +0 -0
  21. face-parse-bisent/logger.py +23 -0
  22. face-parse-bisent/loss.py +75 -0
  23. face-parse-bisent/makeup.py +130 -0
  24. face-parse-bisent/makeup/116_1.png +0 -0
  25. face-parse-bisent/makeup/116_3.png +0 -0
  26. face-parse-bisent/makeup/116_lip_ori.png +0 -0
  27. face-parse-bisent/makeup/116_ori.png +0 -0
  28. face-parse-bisent/model.py +283 -0
  29. face-parse-bisent/modules/__init__.py +5 -0
  30. face-parse-bisent/modules/bn.py +130 -0
  31. face-parse-bisent/modules/deeplab.py +84 -0
  32. face-parse-bisent/modules/dense.py +42 -0
  33. face-parse-bisent/modules/functions.py +234 -0
  34. face-parse-bisent/modules/misc.py +21 -0
  35. face-parse-bisent/modules/residual.py +88 -0
  36. face-parse-bisent/modules/src/checks.h +15 -0
  37. face-parse-bisent/modules/src/inplace_abn.cpp +95 -0
  38. face-parse-bisent/modules/src/inplace_abn.h +88 -0
  39. face-parse-bisent/modules/src/inplace_abn_cpu.cpp +119 -0
  40. face-parse-bisent/modules/src/inplace_abn_cuda.cu +333 -0
  41. face-parse-bisent/modules/src/inplace_abn_cuda_half.cu +275 -0
  42. face-parse-bisent/modules/src/utils/checks.h +15 -0
  43. face-parse-bisent/modules/src/utils/common.h +49 -0
  44. face-parse-bisent/modules/src/utils/cuda.cuh +71 -0
  45. face-parse-bisent/optimizer.py +69 -0
  46. face-parse-bisent/prepropess_data.py +38 -0
  47. face-parse-bisent/resnet.py +109 -0
  48. face-parse-bisent/resnet18-5c106cde.pth +3 -0
  49. face-parse-bisent/test.py +90 -0
  50. face-parse-bisent/train.py +179 -0
dwpose/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
dwpose/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
dwpose/dw-ll_ucoco.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9600664e7927229ed594197d552023e3be213f810beb38847a959ec8261e0f7
3
+ size 404734742
dwpose/dw-ll_ucoco_384.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724f4ff2439ed61afb86fb8a1951ec39c6220682803b4a8bd4f598cd913b1843
3
+ size 134399116
dwpose/dw-ll_ucoco_384.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d9408b13cd863c4e95a149dd31232f88f2a12aa6cf8964ed74d7d97748c7a07
3
+ size 406878486
dwpose/dw-mm_ucoco.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b24f27f57d18d8bb7abc3af8e09bcc5f77ee9ecae13439f70a8f7d1b885413cf
3
+ size 216812378
dwpose/dw-ss_ucoco.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c13dfb1dc63aac2d794ac130bb89734330b3c74a1aff921a40fcde1d87361ffc
3
+ size 102933707
dwpose/dw-tt_ucoco.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7097f5af7f100609acffe58eb01734f02ffbfe22794fe029c2ea0a4d68d0f42d
3
+ size 68475107
dwpose/rtm-l_ucoco_256-95bb32f5_20230822.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95bb32f5c6ef235a01e5787b33040d5330f7d315afbbefb66832cabe83b6e49b
3
+ size 134223626
dwpose/rtm-x_ucoco_256-05f5bcb7_20230822.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f5bcb76599e0e23389a9c21f3390b2aa1a56363b27d844556c3be4b138c536
3
+ size 226726579
dwpose/rtm-x_ucoco_384-f5b50679_20230822.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5b506794e8e4facfa6ae0bf2a19c7c43d67836a90b69a19beced4ddb54732b4
3
+ size 227246772
dwpose/yolox_l.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7860ae79de6c89a3c1eb72ae9a2756c0ccfbe04b7791bb5880afabd97855a411
3
+ size 216746733
face-parse-bisent/.gitignore ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ .hypothesis/
48
+ .pytest_cache/
49
+
50
+ # Translations
51
+ *.mo
52
+ *.pot
53
+
54
+ # Django stuff:
55
+ *.log
56
+ local_settings.py
57
+ db.sqlite3
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ docs/_build/
68
+
69
+ # PyBuilder
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # pyenv
76
+ .python-version
77
+
78
+ # celery beat schedule file
79
+ celerybeat-schedule
80
+
81
+ # SageMath parsed files
82
+ *.sage.py
83
+
84
+ # Environments
85
+ .env
86
+ .venv
87
+ env/
88
+ venv/
89
+ ENV/
90
+ env.bak/
91
+ venv.bak/
92
+
93
+ # Spyder project settings
94
+ .spyderproject
95
+ .spyproject
96
+
97
+ # Rope project settings
98
+ .ropeproject
99
+
100
+ # mkdocs documentation
101
+ /site
102
+
103
+ # mypy
104
+ .mypy_cache/
105
+
106
+ # res
107
+ res/
108
+
109
+ .idea/
110
+
face-parse-bisent/6.jpg ADDED
face-parse-bisent/79999_iter.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:468e13ca13a9b43cc0881a9f99083a430e9c0a38abd935431d1c28ee94b26567
3
+ size 53289463
face-parse-bisent/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2019 zll
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
face-parse-bisent/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # face-parsing.PyTorch
2
+
3
+ <p align="center">
4
+ <a href="https://github.com/zllrunning/face-parsing.PyTorch">
5
+ <img class="page-image" src="https://github.com/zllrunning/face-parsing.PyTorch/blob/master/6.jpg" >
6
+ </a>
7
+ </p>
8
+
9
+ ### Contents
10
+ - [Training](#training)
11
+ - [Demo](#Demo)
12
+ - [References](#references)
13
+
14
+ ## Training
15
+
16
+ 1. Prepare training data:
17
+ -- download [CelebAMask-HQ dataset](https://github.com/switchablenorms/CelebAMask-HQ)
18
+
19
+ -- change file path in the `prepropess_data.py` and run
20
+ ```Shell
21
+ python prepropess_data.py
22
+ ```
23
+
24
+ 2. Train the model using CelebAMask-HQ dataset:
25
+ Just run the train script:
26
+ ```
27
+ $ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 train.py
28
+ ```
29
+
30
+ If you do not wish to train the model, you can download [our pre-trained model](https://drive.google.com/open?id=154JgKpzCPW82qINcVieuPH3fZ2e0P812) and save it in `res/cp`.
31
+
32
+
33
+ ## Demo
34
+ 1. Evaluate the trained model using:
35
+ ```Shell
36
+ # evaluate using GPU
37
+ python test.py
38
+ ```
39
+
40
+ ## Face makeup using parsing maps
41
+ [**face-makeup.PyTorch**](https://github.com/zllrunning/face-makeup.PyTorch)
42
+ <table>
43
+
44
+ <tr>
45
+ <th>&nbsp;</th>
46
+ <th>Hair</th>
47
+ <th>Lip</th>
48
+ </tr>
49
+
50
+ <!-- Line 1: Original Input -->
51
+ <tr>
52
+ <td><em>Original Input</em></td>
53
+ <td><img src="makeup/116_ori.png" height="256" width="256" alt="Original Input"></td>
54
+ <td><img src="makeup/116_lip_ori.png" height="256" width="256" alt="Original Input"></td>
55
+ </tr>
56
+
57
+ <!-- Line 3: Color -->
58
+ <tr>
59
+ <td>Color</td>
60
+ <td><img src="makeup/116_1.png" height="256" width="256" alt="Color"></td>
61
+ <td><img src="makeup/116_3.png" height="256" width="256" alt="Color"></td>
62
+ </tr>
63
+
64
+ </table>
65
+
66
+
67
+ ## References
68
+ - [BiSeNet](https://github.com/CoinCheung/BiSeNet)
face-parse-bisent/evaluate.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ from logger import setup_logger
5
+ from model import BiSeNet
6
+ from face_dataset import FaceMask
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ from torch.utils.data import DataLoader
11
+ import torch.nn.functional as F
12
+ import torch.distributed as dist
13
+
14
+ import os
15
+ import os.path as osp
16
+ import logging
17
+ import time
18
+ import numpy as np
19
+ from tqdm import tqdm
20
+ import math
21
+ from PIL import Image
22
+ import torchvision.transforms as transforms
23
+ import cv2
24
+
25
+ def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
26
+ # Colors for all 20 parts
27
+ part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
28
+ [255, 0, 85], [255, 0, 170],
29
+ [0, 255, 0], [85, 255, 0], [170, 255, 0],
30
+ [0, 255, 85], [0, 255, 170],
31
+ [0, 0, 255], [85, 0, 255], [170, 0, 255],
32
+ [0, 85, 255], [0, 170, 255],
33
+ [255, 255, 0], [255, 255, 85], [255, 255, 170],
34
+ [255, 0, 255], [255, 85, 255], [255, 170, 255],
35
+ [0, 255, 255], [85, 255, 255], [170, 255, 255]]
36
+
37
+ im = np.array(im)
38
+ vis_im = im.copy().astype(np.uint8)
39
+ vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
40
+ vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
41
+ vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
42
+
43
+ num_of_class = np.max(vis_parsing_anno)
44
+
45
+ for pi in range(1, num_of_class + 1):
46
+ index = np.where(vis_parsing_anno == pi)
47
+ vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
48
+
49
+ vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
50
+ # print(vis_parsing_anno_color.shape, vis_im.shape)
51
+ vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
52
+
53
+ # Save result or not
54
+ if save_im:
55
+ cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
56
+
57
+ # return vis_im
58
+
59
+ def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
60
+
61
+ if not os.path.exists(respth):
62
+ os.makedirs(respth)
63
+
64
+ n_classes = 19
65
+ net = BiSeNet(n_classes=n_classes)
66
+ net.cuda()
67
+ save_pth = osp.join('res/cp', cp)
68
+ net.load_state_dict(torch.load(save_pth))
69
+ net.eval()
70
+
71
+ to_tensor = transforms.Compose([
72
+ transforms.ToTensor(),
73
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
74
+ ])
75
+ with torch.no_grad():
76
+ for image_path in os.listdir(dspth):
77
+ img = Image.open(osp.join(dspth, image_path))
78
+ image = img.resize((512, 512), Image.BILINEAR)
79
+ img = to_tensor(image)
80
+ img = torch.unsqueeze(img, 0)
81
+ img = img.cuda()
82
+ out = net(img)[0]
83
+ parsing = out.squeeze(0).cpu().numpy().argmax(0)
84
+
85
+ vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+ if __name__ == "__main__":
94
+ setup_logger('./res')
95
+ evaluate()
face-parse-bisent/face_dataset.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ import torch
5
+ from torch.utils.data import Dataset
6
+ import torchvision.transforms as transforms
7
+
8
+ import os.path as osp
9
+ import os
10
+ from PIL import Image
11
+ import numpy as np
12
+ import json
13
+ import cv2
14
+
15
+ from transform import *
16
+
17
+
18
+
19
+ class FaceMask(Dataset):
20
+ def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs):
21
+ super(FaceMask, self).__init__(*args, **kwargs)
22
+ assert mode in ('train', 'val', 'test')
23
+ self.mode = mode
24
+ self.ignore_lb = 255
25
+ self.rootpth = rootpth
26
+
27
+ self.imgs = os.listdir(os.path.join(self.rootpth, 'CelebA-HQ-img'))
28
+
29
+ # pre-processing
30
+ self.to_tensor = transforms.Compose([
31
+ transforms.ToTensor(),
32
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
33
+ ])
34
+ self.trans_train = Compose([
35
+ ColorJitter(
36
+ brightness=0.5,
37
+ contrast=0.5,
38
+ saturation=0.5),
39
+ HorizontalFlip(),
40
+ RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
41
+ RandomCrop(cropsize)
42
+ ])
43
+
44
+ def __getitem__(self, idx):
45
+ impth = self.imgs[idx]
46
+ img = Image.open(osp.join(self.rootpth, 'CelebA-HQ-img', impth))
47
+ img = img.resize((512, 512), Image.BILINEAR)
48
+ label = Image.open(osp.join(self.rootpth, 'mask', impth[:-3]+'png')).convert('P')
49
+ # print(np.unique(np.array(label)))
50
+ if self.mode == 'train':
51
+ im_lb = dict(im=img, lb=label)
52
+ im_lb = self.trans_train(im_lb)
53
+ img, label = im_lb['im'], im_lb['lb']
54
+ img = self.to_tensor(img)
55
+ label = np.array(label).astype(np.int64)[np.newaxis, :]
56
+ return img, label
57
+
58
+ def __len__(self):
59
+ return len(self.imgs)
60
+
61
+
62
+ if __name__ == "__main__":
63
+ face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
64
+ face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
65
+ mask_path = '/home/zll/data/CelebAMask-HQ/mask'
66
+ counter = 0
67
+ total = 0
68
+ for i in range(15):
69
+ # files = os.listdir(osp.join(face_sep_mask, str(i)))
70
+
71
+ atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
72
+ 'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
73
+
74
+ for j in range(i*2000, (i+1)*2000):
75
+
76
+ mask = np.zeros((512, 512))
77
+
78
+ for l, att in enumerate(atts, 1):
79
+ total += 1
80
+ file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
81
+ path = osp.join(face_sep_mask, str(i), file_name)
82
+
83
+ if os.path.exists(path):
84
+ counter += 1
85
+ sep_mask = np.array(Image.open(path).convert('P'))
86
+ # print(np.unique(sep_mask))
87
+
88
+ mask[sep_mask == 225] = l
89
+ cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
90
+ print(j)
91
+
92
+ print(counter, total)
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
face-parse-bisent/hair.png ADDED
face-parse-bisent/logger.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+
5
+ import os.path as osp
6
+ import time
7
+ import sys
8
+ import logging
9
+
10
+ import torch.distributed as dist
11
+
12
+
13
+ def setup_logger(logpth):
14
+ logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
15
+ logfile = osp.join(logpth, logfile)
16
+ FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
17
+ log_level = logging.INFO
18
+ if dist.is_initialized() and not dist.get_rank()==0:
19
+ log_level = logging.ERROR
20
+ logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
21
+ logging.root.addHandler(logging.StreamHandler())
22
+
23
+
face-parse-bisent/loss.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+
9
+ import numpy as np
10
+
11
+
12
+ class OhemCELoss(nn.Module):
13
+ def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
14
+ super(OhemCELoss, self).__init__()
15
+ self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
16
+ self.n_min = n_min
17
+ self.ignore_lb = ignore_lb
18
+ self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
19
+
20
+ def forward(self, logits, labels):
21
+ N, C, H, W = logits.size()
22
+ loss = self.criteria(logits, labels).view(-1)
23
+ loss, _ = torch.sort(loss, descending=True)
24
+ if loss[self.n_min] > self.thresh:
25
+ loss = loss[loss>self.thresh]
26
+ else:
27
+ loss = loss[:self.n_min]
28
+ return torch.mean(loss)
29
+
30
+
31
+ class SoftmaxFocalLoss(nn.Module):
32
+ def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
33
+ super(SoftmaxFocalLoss, self).__init__()
34
+ self.gamma = gamma
35
+ self.nll = nn.NLLLoss(ignore_index=ignore_lb)
36
+
37
+ def forward(self, logits, labels):
38
+ scores = F.softmax(logits, dim=1)
39
+ factor = torch.pow(1.-scores, self.gamma)
40
+ log_score = F.log_softmax(logits, dim=1)
41
+ log_score = factor * log_score
42
+ loss = self.nll(log_score, labels)
43
+ return loss
44
+
45
+
46
+ if __name__ == '__main__':
47
+ torch.manual_seed(15)
48
+ criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
49
+ criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
50
+ net1 = nn.Sequential(
51
+ nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
52
+ )
53
+ net1.cuda()
54
+ net1.train()
55
+ net2 = nn.Sequential(
56
+ nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
57
+ )
58
+ net2.cuda()
59
+ net2.train()
60
+
61
+ with torch.no_grad():
62
+ inten = torch.randn(16, 3, 20, 20).cuda()
63
+ lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
64
+ lbs[1, :, :] = 255
65
+
66
+ logits1 = net1(inten)
67
+ logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
68
+ logits2 = net2(inten)
69
+ logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
70
+
71
+ loss1 = criteria1(logits1, lbs)
72
+ loss2 = criteria2(logits2, lbs)
73
+ loss = loss1 + loss2
74
+ print(loss.detach().cpu())
75
+ loss.backward()
face-parse-bisent/makeup.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import os
3
+ import numpy as np
4
+ from skimage.filters import gaussian
5
+
6
+
7
+ def sharpen(img):
8
+ img = img * 1.0
9
+ gauss_out = gaussian(img, sigma=5, multichannel=True)
10
+
11
+ alpha = 1.5
12
+ img_out = (img - gauss_out) * alpha + img
13
+
14
+ img_out = img_out / 255.0
15
+
16
+ mask_1 = img_out < 0
17
+ mask_2 = img_out > 1
18
+
19
+ img_out = img_out * (1 - mask_1)
20
+ img_out = img_out * (1 - mask_2) + mask_2
21
+ img_out = np.clip(img_out, 0, 1)
22
+ img_out = img_out * 255
23
+ return np.array(img_out, dtype=np.uint8)
24
+
25
+
26
+ def hair(image, parsing, part=17, color=[230, 50, 20]):
27
+ b, g, r = color #[10, 50, 250] # [10, 250, 10]
28
+ tar_color = np.zeros_like(image)
29
+ tar_color[:, :, 0] = b
30
+ tar_color[:, :, 1] = g
31
+ tar_color[:, :, 2] = r
32
+
33
+ image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
34
+ tar_hsv = cv2.cvtColor(tar_color, cv2.COLOR_BGR2HSV)
35
+
36
+ if part == 12 or part == 13:
37
+ image_hsv[:, :, 0:2] = tar_hsv[:, :, 0:2]
38
+ else:
39
+ image_hsv[:, :, 0:1] = tar_hsv[:, :, 0:1]
40
+
41
+ changed = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2BGR)
42
+
43
+ if part == 17:
44
+ changed = sharpen(changed)
45
+
46
+ changed[parsing != part] = image[parsing != part]
47
+ # changed = cv2.resize(changed, (512, 512))
48
+ return changed
49
+
50
+ #
51
+ # def lip(image, parsing, part=17, color=[230, 50, 20]):
52
+ # b, g, r = color #[10, 50, 250] # [10, 250, 10]
53
+ # tar_color = np.zeros_like(image)
54
+ # tar_color[:, :, 0] = b
55
+ # tar_color[:, :, 1] = g
56
+ # tar_color[:, :, 2] = r
57
+ #
58
+ # image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)
59
+ # il, ia, ib = cv2.split(image_lab)
60
+ #
61
+ # tar_lab = cv2.cvtColor(tar_color, cv2.COLOR_BGR2Lab)
62
+ # tl, ta, tb = cv2.split(tar_lab)
63
+ #
64
+ # image_lab[:, :, 0] = np.clip(il - np.mean(il) + tl, 0, 100)
65
+ # image_lab[:, :, 1] = np.clip(ia - np.mean(ia) + ta, -127, 128)
66
+ # image_lab[:, :, 2] = np.clip(ib - np.mean(ib) + tb, -127, 128)
67
+ #
68
+ #
69
+ # changed = cv2.cvtColor(image_lab, cv2.COLOR_Lab2BGR)
70
+ #
71
+ # if part == 17:
72
+ # changed = sharpen(changed)
73
+ #
74
+ # changed[parsing != part] = image[parsing != part]
75
+ # # changed = cv2.resize(changed, (512, 512))
76
+ # return changed
77
+
78
+
79
+ if __name__ == '__main__':
80
+ # 1 face
81
+ # 10 nose
82
+ # 11 teeth
83
+ # 12 upper lip
84
+ # 13 lower lip
85
+ # 17 hair
86
+ num = 116
87
+ table = {
88
+ 'hair': 17,
89
+ 'upper_lip': 12,
90
+ 'lower_lip': 13
91
+ }
92
+ image_path = '/home/zll/data/CelebAMask-HQ/test-img/{}.jpg'.format(num)
93
+ parsing_path = 'res/test_res/{}.png'.format(num)
94
+
95
+ image = cv2.imread(image_path)
96
+ ori = image.copy()
97
+ parsing = np.array(cv2.imread(parsing_path, 0))
98
+ parsing = cv2.resize(parsing, image.shape[0:2], interpolation=cv2.INTER_NEAREST)
99
+
100
+ parts = [table['hair'], table['upper_lip'], table['lower_lip']]
101
+ # colors = [[20, 20, 200], [100, 100, 230], [100, 100, 230]]
102
+ colors = [[100, 200, 100]]
103
+ for part, color in zip(parts, colors):
104
+ image = hair(image, parsing, part, color)
105
+ cv2.imwrite('res/makeup/116_ori.png', cv2.resize(ori, (512, 512)))
106
+ cv2.imwrite('res/makeup/116_2.png', cv2.resize(image, (512, 512)))
107
+
108
+ cv2.imshow('image', cv2.resize(ori, (512, 512)))
109
+ cv2.imshow('color', cv2.resize(image, (512, 512)))
110
+
111
+ # cv2.imshow('image', ori)
112
+ # cv2.imshow('color', image)
113
+
114
+ cv2.waitKey(0)
115
+ cv2.destroyAllWindows()
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
face-parse-bisent/makeup/116_1.png ADDED
face-parse-bisent/makeup/116_3.png ADDED
face-parse-bisent/makeup/116_lip_ori.png ADDED
face-parse-bisent/makeup/116_ori.png ADDED
face-parse-bisent/model.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ import torchvision
9
+
10
+ from resnet import Resnet18
11
+ # from modules.bn import InPlaceABNSync as BatchNorm2d
12
+
13
+
14
+ class ConvBNReLU(nn.Module):
15
+ def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
16
+ super(ConvBNReLU, self).__init__()
17
+ self.conv = nn.Conv2d(in_chan,
18
+ out_chan,
19
+ kernel_size = ks,
20
+ stride = stride,
21
+ padding = padding,
22
+ bias = False)
23
+ self.bn = nn.BatchNorm2d(out_chan)
24
+ self.init_weight()
25
+
26
+ def forward(self, x):
27
+ x = self.conv(x)
28
+ x = F.relu(self.bn(x))
29
+ return x
30
+
31
+ def init_weight(self):
32
+ for ly in self.children():
33
+ if isinstance(ly, nn.Conv2d):
34
+ nn.init.kaiming_normal_(ly.weight, a=1)
35
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
36
+
37
+ class BiSeNetOutput(nn.Module):
38
+ def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
39
+ super(BiSeNetOutput, self).__init__()
40
+ self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
41
+ self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
42
+ self.init_weight()
43
+
44
+ def forward(self, x):
45
+ x = self.conv(x)
46
+ x = self.conv_out(x)
47
+ return x
48
+
49
+ def init_weight(self):
50
+ for ly in self.children():
51
+ if isinstance(ly, nn.Conv2d):
52
+ nn.init.kaiming_normal_(ly.weight, a=1)
53
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
54
+
55
+ def get_params(self):
56
+ wd_params, nowd_params = [], []
57
+ for name, module in self.named_modules():
58
+ if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
59
+ wd_params.append(module.weight)
60
+ if not module.bias is None:
61
+ nowd_params.append(module.bias)
62
+ elif isinstance(module, nn.BatchNorm2d):
63
+ nowd_params += list(module.parameters())
64
+ return wd_params, nowd_params
65
+
66
+
67
+ class AttentionRefinementModule(nn.Module):
68
+ def __init__(self, in_chan, out_chan, *args, **kwargs):
69
+ super(AttentionRefinementModule, self).__init__()
70
+ self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
71
+ self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
72
+ self.bn_atten = nn.BatchNorm2d(out_chan)
73
+ self.sigmoid_atten = nn.Sigmoid()
74
+ self.init_weight()
75
+
76
+ def forward(self, x):
77
+ feat = self.conv(x)
78
+ atten = F.avg_pool2d(feat, feat.size()[2:])
79
+ atten = self.conv_atten(atten)
80
+ atten = self.bn_atten(atten)
81
+ atten = self.sigmoid_atten(atten)
82
+ out = torch.mul(feat, atten)
83
+ return out
84
+
85
+ def init_weight(self):
86
+ for ly in self.children():
87
+ if isinstance(ly, nn.Conv2d):
88
+ nn.init.kaiming_normal_(ly.weight, a=1)
89
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
90
+
91
+
92
+ class ContextPath(nn.Module):
93
+ def __init__(self, *args, **kwargs):
94
+ super(ContextPath, self).__init__()
95
+ self.resnet = Resnet18()
96
+ self.arm16 = AttentionRefinementModule(256, 128)
97
+ self.arm32 = AttentionRefinementModule(512, 128)
98
+ self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
99
+ self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
100
+ self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
101
+
102
+ self.init_weight()
103
+
104
+ def forward(self, x):
105
+ H0, W0 = x.size()[2:]
106
+ feat8, feat16, feat32 = self.resnet(x)
107
+ H8, W8 = feat8.size()[2:]
108
+ H16, W16 = feat16.size()[2:]
109
+ H32, W32 = feat32.size()[2:]
110
+
111
+ avg = F.avg_pool2d(feat32, feat32.size()[2:])
112
+ avg = self.conv_avg(avg)
113
+ avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
114
+
115
+ feat32_arm = self.arm32(feat32)
116
+ feat32_sum = feat32_arm + avg_up
117
+ feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
118
+ feat32_up = self.conv_head32(feat32_up)
119
+
120
+ feat16_arm = self.arm16(feat16)
121
+ feat16_sum = feat16_arm + feat32_up
122
+ feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
123
+ feat16_up = self.conv_head16(feat16_up)
124
+
125
+ return feat8, feat16_up, feat32_up # x8, x8, x16
126
+
127
+ def init_weight(self):
128
+ for ly in self.children():
129
+ if isinstance(ly, nn.Conv2d):
130
+ nn.init.kaiming_normal_(ly.weight, a=1)
131
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
132
+
133
+ def get_params(self):
134
+ wd_params, nowd_params = [], []
135
+ for name, module in self.named_modules():
136
+ if isinstance(module, (nn.Linear, nn.Conv2d)):
137
+ wd_params.append(module.weight)
138
+ if not module.bias is None:
139
+ nowd_params.append(module.bias)
140
+ elif isinstance(module, nn.BatchNorm2d):
141
+ nowd_params += list(module.parameters())
142
+ return wd_params, nowd_params
143
+
144
+
145
+ ### This is not used, since I replace this with the resnet feature with the same size
146
+ class SpatialPath(nn.Module):
147
+ def __init__(self, *args, **kwargs):
148
+ super(SpatialPath, self).__init__()
149
+ self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
150
+ self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
151
+ self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
152
+ self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
153
+ self.init_weight()
154
+
155
+ def forward(self, x):
156
+ feat = self.conv1(x)
157
+ feat = self.conv2(feat)
158
+ feat = self.conv3(feat)
159
+ feat = self.conv_out(feat)
160
+ return feat
161
+
162
+ def init_weight(self):
163
+ for ly in self.children():
164
+ if isinstance(ly, nn.Conv2d):
165
+ nn.init.kaiming_normal_(ly.weight, a=1)
166
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
167
+
168
+ def get_params(self):
169
+ wd_params, nowd_params = [], []
170
+ for name, module in self.named_modules():
171
+ if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
172
+ wd_params.append(module.weight)
173
+ if not module.bias is None:
174
+ nowd_params.append(module.bias)
175
+ elif isinstance(module, nn.BatchNorm2d):
176
+ nowd_params += list(module.parameters())
177
+ return wd_params, nowd_params
178
+
179
+
180
+ class FeatureFusionModule(nn.Module):
181
+ def __init__(self, in_chan, out_chan, *args, **kwargs):
182
+ super(FeatureFusionModule, self).__init__()
183
+ self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
184
+ self.conv1 = nn.Conv2d(out_chan,
185
+ out_chan//4,
186
+ kernel_size = 1,
187
+ stride = 1,
188
+ padding = 0,
189
+ bias = False)
190
+ self.conv2 = nn.Conv2d(out_chan//4,
191
+ out_chan,
192
+ kernel_size = 1,
193
+ stride = 1,
194
+ padding = 0,
195
+ bias = False)
196
+ self.relu = nn.ReLU(inplace=True)
197
+ self.sigmoid = nn.Sigmoid()
198
+ self.init_weight()
199
+
200
+ def forward(self, fsp, fcp):
201
+ fcat = torch.cat([fsp, fcp], dim=1)
202
+ feat = self.convblk(fcat)
203
+ atten = F.avg_pool2d(feat, feat.size()[2:])
204
+ atten = self.conv1(atten)
205
+ atten = self.relu(atten)
206
+ atten = self.conv2(atten)
207
+ atten = self.sigmoid(atten)
208
+ feat_atten = torch.mul(feat, atten)
209
+ feat_out = feat_atten + feat
210
+ return feat_out
211
+
212
+ def init_weight(self):
213
+ for ly in self.children():
214
+ if isinstance(ly, nn.Conv2d):
215
+ nn.init.kaiming_normal_(ly.weight, a=1)
216
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
217
+
218
+ def get_params(self):
219
+ wd_params, nowd_params = [], []
220
+ for name, module in self.named_modules():
221
+ if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
222
+ wd_params.append(module.weight)
223
+ if not module.bias is None:
224
+ nowd_params.append(module.bias)
225
+ elif isinstance(module, nn.BatchNorm2d):
226
+ nowd_params += list(module.parameters())
227
+ return wd_params, nowd_params
228
+
229
+
230
+ class BiSeNet(nn.Module):
231
+ def __init__(self, n_classes, *args, **kwargs):
232
+ super(BiSeNet, self).__init__()
233
+ self.cp = ContextPath()
234
+ ## here self.sp is deleted
235
+ self.ffm = FeatureFusionModule(256, 256)
236
+ self.conv_out = BiSeNetOutput(256, 256, n_classes)
237
+ self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
238
+ self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
239
+ self.init_weight()
240
+
241
+ def forward(self, x):
242
+ H, W = x.size()[2:]
243
+ feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature
244
+ feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature
245
+ feat_fuse = self.ffm(feat_sp, feat_cp8)
246
+
247
+ feat_out = self.conv_out(feat_fuse)
248
+ feat_out16 = self.conv_out16(feat_cp8)
249
+ feat_out32 = self.conv_out32(feat_cp16)
250
+
251
+ feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
252
+ feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
253
+ feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
254
+ return feat_out, feat_out16, feat_out32
255
+
256
+ def init_weight(self):
257
+ for ly in self.children():
258
+ if isinstance(ly, nn.Conv2d):
259
+ nn.init.kaiming_normal_(ly.weight, a=1)
260
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
261
+
262
+ def get_params(self):
263
+ wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
264
+ for name, child in self.named_children():
265
+ child_wd_params, child_nowd_params = child.get_params()
266
+ if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
267
+ lr_mul_wd_params += child_wd_params
268
+ lr_mul_nowd_params += child_nowd_params
269
+ else:
270
+ wd_params += child_wd_params
271
+ nowd_params += child_nowd_params
272
+ return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
273
+
274
+
275
+ if __name__ == "__main__":
276
+ net = BiSeNet(19)
277
+ net.cuda()
278
+ net.eval()
279
+ in_ten = torch.randn(16, 3, 640, 480).cuda()
280
+ out, out16, out32 = net(in_ten)
281
+ print(out.shape)
282
+
283
+ net.get_params()
face-parse-bisent/modules/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .bn import ABN, InPlaceABN, InPlaceABNSync
2
+ from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
3
+ from .misc import GlobalAvgPool2d, SingleGPU
4
+ from .residual import IdentityResidualBlock
5
+ from .dense import DenseModule
face-parse-bisent/modules/bn.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as functional
4
+
5
+ try:
6
+ from queue import Queue
7
+ except ImportError:
8
+ from Queue import Queue
9
+
10
+ from .functions import *
11
+
12
+
13
+ class ABN(nn.Module):
14
+ """Activated Batch Normalization
15
+
16
+ This gathers a `BatchNorm2d` and an activation function in a single module
17
+ """
18
+
19
+ def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
20
+ """Creates an Activated Batch Normalization module
21
+
22
+ Parameters
23
+ ----------
24
+ num_features : int
25
+ Number of feature channels in the input and output.
26
+ eps : float
27
+ Small constant to prevent numerical issues.
28
+ momentum : float
29
+ Momentum factor applied to compute running statistics as.
30
+ affine : bool
31
+ If `True` apply learned scale and shift transformation after normalization.
32
+ activation : str
33
+ Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
34
+ slope : float
35
+ Negative slope for the `leaky_relu` activation.
36
+ """
37
+ super(ABN, self).__init__()
38
+ self.num_features = num_features
39
+ self.affine = affine
40
+ self.eps = eps
41
+ self.momentum = momentum
42
+ self.activation = activation
43
+ self.slope = slope
44
+ if self.affine:
45
+ self.weight = nn.Parameter(torch.ones(num_features))
46
+ self.bias = nn.Parameter(torch.zeros(num_features))
47
+ else:
48
+ self.register_parameter('weight', None)
49
+ self.register_parameter('bias', None)
50
+ self.register_buffer('running_mean', torch.zeros(num_features))
51
+ self.register_buffer('running_var', torch.ones(num_features))
52
+ self.reset_parameters()
53
+
54
+ def reset_parameters(self):
55
+ nn.init.constant_(self.running_mean, 0)
56
+ nn.init.constant_(self.running_var, 1)
57
+ if self.affine:
58
+ nn.init.constant_(self.weight, 1)
59
+ nn.init.constant_(self.bias, 0)
60
+
61
+ def forward(self, x):
62
+ x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
63
+ self.training, self.momentum, self.eps)
64
+
65
+ if self.activation == ACT_RELU:
66
+ return functional.relu(x, inplace=True)
67
+ elif self.activation == ACT_LEAKY_RELU:
68
+ return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
69
+ elif self.activation == ACT_ELU:
70
+ return functional.elu(x, inplace=True)
71
+ else:
72
+ return x
73
+
74
+ def __repr__(self):
75
+ rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
76
+ ' affine={affine}, activation={activation}'
77
+ if self.activation == "leaky_relu":
78
+ rep += ', slope={slope})'
79
+ else:
80
+ rep += ')'
81
+ return rep.format(name=self.__class__.__name__, **self.__dict__)
82
+
83
+
84
+ class InPlaceABN(ABN):
85
+ """InPlace Activated Batch Normalization"""
86
+
87
+ def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
88
+ """Creates an InPlace Activated Batch Normalization module
89
+
90
+ Parameters
91
+ ----------
92
+ num_features : int
93
+ Number of feature channels in the input and output.
94
+ eps : float
95
+ Small constant to prevent numerical issues.
96
+ momentum : float
97
+ Momentum factor applied to compute running statistics as.
98
+ affine : bool
99
+ If `True` apply learned scale and shift transformation after normalization.
100
+ activation : str
101
+ Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
102
+ slope : float
103
+ Negative slope for the `leaky_relu` activation.
104
+ """
105
+ super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
106
+
107
+ def forward(self, x):
108
+ return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
109
+ self.training, self.momentum, self.eps, self.activation, self.slope)
110
+
111
+
112
+ class InPlaceABNSync(ABN):
113
+ """InPlace Activated Batch Normalization with cross-GPU synchronization
114
+ This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
115
+ """
116
+
117
+ def forward(self, x):
118
+ return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
119
+ self.training, self.momentum, self.eps, self.activation, self.slope)
120
+
121
+ def __repr__(self):
122
+ rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
123
+ ' affine={affine}, activation={activation}'
124
+ if self.activation == "leaky_relu":
125
+ rep += ', slope={slope})'
126
+ else:
127
+ rep += ')'
128
+ return rep.format(name=self.__class__.__name__, **self.__dict__)
129
+
130
+
face-parse-bisent/modules/deeplab.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as functional
4
+
5
+ from models._util import try_index
6
+ from .bn import ABN
7
+
8
+
9
+ class DeeplabV3(nn.Module):
10
+ def __init__(self,
11
+ in_channels,
12
+ out_channels,
13
+ hidden_channels=256,
14
+ dilations=(12, 24, 36),
15
+ norm_act=ABN,
16
+ pooling_size=None):
17
+ super(DeeplabV3, self).__init__()
18
+ self.pooling_size = pooling_size
19
+
20
+ self.map_convs = nn.ModuleList([
21
+ nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
22
+ nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
23
+ nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
24
+ nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
25
+ ])
26
+ self.map_bn = norm_act(hidden_channels * 4)
27
+
28
+ self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
29
+ self.global_pooling_bn = norm_act(hidden_channels)
30
+
31
+ self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
32
+ self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
33
+ self.red_bn = norm_act(out_channels)
34
+
35
+ self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
36
+
37
+ def reset_parameters(self, activation, slope):
38
+ gain = nn.init.calculate_gain(activation, slope)
39
+ for m in self.modules():
40
+ if isinstance(m, nn.Conv2d):
41
+ nn.init.xavier_normal_(m.weight.data, gain)
42
+ if hasattr(m, "bias") and m.bias is not None:
43
+ nn.init.constant_(m.bias, 0)
44
+ elif isinstance(m, ABN):
45
+ if hasattr(m, "weight") and m.weight is not None:
46
+ nn.init.constant_(m.weight, 1)
47
+ if hasattr(m, "bias") and m.bias is not None:
48
+ nn.init.constant_(m.bias, 0)
49
+
50
+ def forward(self, x):
51
+ # Map convolutions
52
+ out = torch.cat([m(x) for m in self.map_convs], dim=1)
53
+ out = self.map_bn(out)
54
+ out = self.red_conv(out)
55
+
56
+ # Global pooling
57
+ pool = self._global_pooling(x)
58
+ pool = self.global_pooling_conv(pool)
59
+ pool = self.global_pooling_bn(pool)
60
+ pool = self.pool_red_conv(pool)
61
+ if self.training or self.pooling_size is None:
62
+ pool = pool.repeat(1, 1, x.size(2), x.size(3))
63
+
64
+ out += pool
65
+ out = self.red_bn(out)
66
+ return out
67
+
68
+ def _global_pooling(self, x):
69
+ if self.training or self.pooling_size is None:
70
+ pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
71
+ pool = pool.view(x.size(0), x.size(1), 1, 1)
72
+ else:
73
+ pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
74
+ min(try_index(self.pooling_size, 1), x.shape[3]))
75
+ padding = (
76
+ (pooling_size[1] - 1) // 2,
77
+ (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
78
+ (pooling_size[0] - 1) // 2,
79
+ (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
80
+ )
81
+
82
+ pool = functional.avg_pool2d(x, pooling_size, stride=1)
83
+ pool = functional.pad(pool, pad=padding, mode="replicate")
84
+ return pool
face-parse-bisent/modules/dense.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+
6
+ from .bn import ABN
7
+
8
+
9
+ class DenseModule(nn.Module):
10
+ def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
11
+ super(DenseModule, self).__init__()
12
+ self.in_channels = in_channels
13
+ self.growth = growth
14
+ self.layers = layers
15
+
16
+ self.convs1 = nn.ModuleList()
17
+ self.convs3 = nn.ModuleList()
18
+ for i in range(self.layers):
19
+ self.convs1.append(nn.Sequential(OrderedDict([
20
+ ("bn", norm_act(in_channels)),
21
+ ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
22
+ ])))
23
+ self.convs3.append(nn.Sequential(OrderedDict([
24
+ ("bn", norm_act(self.growth * bottleneck_factor)),
25
+ ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
26
+ dilation=dilation))
27
+ ])))
28
+ in_channels += self.growth
29
+
30
+ @property
31
+ def out_channels(self):
32
+ return self.in_channels + self.growth * self.layers
33
+
34
+ def forward(self, x):
35
+ inputs = [x]
36
+ for i in range(self.layers):
37
+ x = torch.cat(inputs, dim=1)
38
+ x = self.convs1[i](x)
39
+ x = self.convs3[i](x)
40
+ inputs += [x]
41
+
42
+ return torch.cat(inputs, dim=1)
face-parse-bisent/modules/functions.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import path
2
+ import torch
3
+ import torch.distributed as dist
4
+ import torch.autograd as autograd
5
+ import torch.cuda.comm as comm
6
+ from torch.autograd.function import once_differentiable
7
+ from torch.utils.cpp_extension import load
8
+
9
+ _src_path = path.join(path.dirname(path.abspath(__file__)), "src")
10
+ _backend = load(name="inplace_abn",
11
+ extra_cflags=["-O3"],
12
+ sources=[path.join(_src_path, f) for f in [
13
+ "inplace_abn.cpp",
14
+ "inplace_abn_cpu.cpp",
15
+ "inplace_abn_cuda.cu",
16
+ "inplace_abn_cuda_half.cu"
17
+ ]],
18
+ extra_cuda_cflags=["--expt-extended-lambda"])
19
+
20
+ # Activation names
21
+ ACT_RELU = "relu"
22
+ ACT_LEAKY_RELU = "leaky_relu"
23
+ ACT_ELU = "elu"
24
+ ACT_NONE = "none"
25
+
26
+
27
+ def _check(fn, *args, **kwargs):
28
+ success = fn(*args, **kwargs)
29
+ if not success:
30
+ raise RuntimeError("CUDA Error encountered in {}".format(fn))
31
+
32
+
33
+ def _broadcast_shape(x):
34
+ out_size = []
35
+ for i, s in enumerate(x.size()):
36
+ if i != 1:
37
+ out_size.append(1)
38
+ else:
39
+ out_size.append(s)
40
+ return out_size
41
+
42
+
43
+ def _reduce(x):
44
+ if len(x.size()) == 2:
45
+ return x.sum(dim=0)
46
+ else:
47
+ n, c = x.size()[0:2]
48
+ return x.contiguous().view((n, c, -1)).sum(2).sum(0)
49
+
50
+
51
+ def _count_samples(x):
52
+ count = 1
53
+ for i, s in enumerate(x.size()):
54
+ if i != 1:
55
+ count *= s
56
+ return count
57
+
58
+
59
+ def _act_forward(ctx, x):
60
+ if ctx.activation == ACT_LEAKY_RELU:
61
+ _backend.leaky_relu_forward(x, ctx.slope)
62
+ elif ctx.activation == ACT_ELU:
63
+ _backend.elu_forward(x)
64
+ elif ctx.activation == ACT_NONE:
65
+ pass
66
+
67
+
68
+ def _act_backward(ctx, x, dx):
69
+ if ctx.activation == ACT_LEAKY_RELU:
70
+ _backend.leaky_relu_backward(x, dx, ctx.slope)
71
+ elif ctx.activation == ACT_ELU:
72
+ _backend.elu_backward(x, dx)
73
+ elif ctx.activation == ACT_NONE:
74
+ pass
75
+
76
+
77
+ class InPlaceABN(autograd.Function):
78
+ @staticmethod
79
+ def forward(ctx, x, weight, bias, running_mean, running_var,
80
+ training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
81
+ # Save context
82
+ ctx.training = training
83
+ ctx.momentum = momentum
84
+ ctx.eps = eps
85
+ ctx.activation = activation
86
+ ctx.slope = slope
87
+ ctx.affine = weight is not None and bias is not None
88
+
89
+ # Prepare inputs
90
+ count = _count_samples(x)
91
+ x = x.contiguous()
92
+ weight = weight.contiguous() if ctx.affine else x.new_empty(0)
93
+ bias = bias.contiguous() if ctx.affine else x.new_empty(0)
94
+
95
+ if ctx.training:
96
+ mean, var = _backend.mean_var(x)
97
+
98
+ # Update running stats
99
+ running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
100
+ running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
101
+
102
+ # Mark in-place modified tensors
103
+ ctx.mark_dirty(x, running_mean, running_var)
104
+ else:
105
+ mean, var = running_mean.contiguous(), running_var.contiguous()
106
+ ctx.mark_dirty(x)
107
+
108
+ # BN forward + activation
109
+ _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
110
+ _act_forward(ctx, x)
111
+
112
+ # Output
113
+ ctx.var = var
114
+ ctx.save_for_backward(x, var, weight, bias)
115
+ return x
116
+
117
+ @staticmethod
118
+ @once_differentiable
119
+ def backward(ctx, dz):
120
+ z, var, weight, bias = ctx.saved_tensors
121
+ dz = dz.contiguous()
122
+
123
+ # Undo activation
124
+ _act_backward(ctx, z, dz)
125
+
126
+ if ctx.training:
127
+ edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
128
+ else:
129
+ # TODO: implement simplified CUDA backward for inference mode
130
+ edz = dz.new_zeros(dz.size(1))
131
+ eydz = dz.new_zeros(dz.size(1))
132
+
133
+ dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
134
+ dweight = eydz * weight.sign() if ctx.affine else None
135
+ dbias = edz if ctx.affine else None
136
+
137
+ return dx, dweight, dbias, None, None, None, None, None, None, None
138
+
139
+ class InPlaceABNSync(autograd.Function):
140
+ @classmethod
141
+ def forward(cls, ctx, x, weight, bias, running_mean, running_var,
142
+ training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
143
+ # Save context
144
+ ctx.training = training
145
+ ctx.momentum = momentum
146
+ ctx.eps = eps
147
+ ctx.activation = activation
148
+ ctx.slope = slope
149
+ ctx.affine = weight is not None and bias is not None
150
+
151
+ # Prepare inputs
152
+ ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
153
+
154
+ #count = _count_samples(x)
155
+ batch_size = x.new_tensor([x.shape[0]],dtype=torch.long)
156
+
157
+ x = x.contiguous()
158
+ weight = weight.contiguous() if ctx.affine else x.new_empty(0)
159
+ bias = bias.contiguous() if ctx.affine else x.new_empty(0)
160
+
161
+ if ctx.training:
162
+ mean, var = _backend.mean_var(x)
163
+ if ctx.world_size>1:
164
+ # get global batch size
165
+ if equal_batches:
166
+ batch_size *= ctx.world_size
167
+ else:
168
+ dist.all_reduce(batch_size, dist.ReduceOp.SUM)
169
+
170
+ ctx.factor = x.shape[0]/float(batch_size.item())
171
+
172
+ mean_all = mean.clone() * ctx.factor
173
+ dist.all_reduce(mean_all, dist.ReduceOp.SUM)
174
+
175
+ var_all = (var + (mean - mean_all) ** 2) * ctx.factor
176
+ dist.all_reduce(var_all, dist.ReduceOp.SUM)
177
+
178
+ mean = mean_all
179
+ var = var_all
180
+
181
+ # Update running stats
182
+ running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
183
+ count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1]
184
+ running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
185
+
186
+ # Mark in-place modified tensors
187
+ ctx.mark_dirty(x, running_mean, running_var)
188
+ else:
189
+ mean, var = running_mean.contiguous(), running_var.contiguous()
190
+ ctx.mark_dirty(x)
191
+
192
+ # BN forward + activation
193
+ _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
194
+ _act_forward(ctx, x)
195
+
196
+ # Output
197
+ ctx.var = var
198
+ ctx.save_for_backward(x, var, weight, bias)
199
+ return x
200
+
201
+ @staticmethod
202
+ @once_differentiable
203
+ def backward(ctx, dz):
204
+ z, var, weight, bias = ctx.saved_tensors
205
+ dz = dz.contiguous()
206
+
207
+ # Undo activation
208
+ _act_backward(ctx, z, dz)
209
+
210
+ if ctx.training:
211
+ edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
212
+ edz_local = edz.clone()
213
+ eydz_local = eydz.clone()
214
+
215
+ if ctx.world_size>1:
216
+ edz *= ctx.factor
217
+ dist.all_reduce(edz, dist.ReduceOp.SUM)
218
+
219
+ eydz *= ctx.factor
220
+ dist.all_reduce(eydz, dist.ReduceOp.SUM)
221
+ else:
222
+ edz_local = edz = dz.new_zeros(dz.size(1))
223
+ eydz_local = eydz = dz.new_zeros(dz.size(1))
224
+
225
+ dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
226
+ dweight = eydz_local * weight.sign() if ctx.affine else None
227
+ dbias = edz_local if ctx.affine else None
228
+
229
+ return dx, dweight, dbias, None, None, None, None, None, None, None
230
+
231
+ inplace_abn = InPlaceABN.apply
232
+ inplace_abn_sync = InPlaceABNSync.apply
233
+
234
+ __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
face-parse-bisent/modules/misc.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+ import torch.distributed as dist
4
+
5
+ class GlobalAvgPool2d(nn.Module):
6
+ def __init__(self):
7
+ """Global average pooling over the input's spatial dimensions"""
8
+ super(GlobalAvgPool2d, self).__init__()
9
+
10
+ def forward(self, inputs):
11
+ in_size = inputs.size()
12
+ return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
13
+
14
+ class SingleGPU(nn.Module):
15
+ def __init__(self, module):
16
+ super(SingleGPU, self).__init__()
17
+ self.module=module
18
+
19
+ def forward(self, input):
20
+ return self.module(input.cuda(non_blocking=True))
21
+
face-parse-bisent/modules/residual.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+
3
+ import torch.nn as nn
4
+
5
+ from .bn import ABN
6
+
7
+
8
+ class IdentityResidualBlock(nn.Module):
9
+ def __init__(self,
10
+ in_channels,
11
+ channels,
12
+ stride=1,
13
+ dilation=1,
14
+ groups=1,
15
+ norm_act=ABN,
16
+ dropout=None):
17
+ """Configurable identity-mapping residual block
18
+
19
+ Parameters
20
+ ----------
21
+ in_channels : int
22
+ Number of input channels.
23
+ channels : list of int
24
+ Number of channels in the internal feature maps. Can either have two or three elements: if three construct
25
+ a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
26
+ `3 x 3` then `1 x 1` convolutions.
27
+ stride : int
28
+ Stride of the first `3 x 3` convolution
29
+ dilation : int
30
+ Dilation to apply to the `3 x 3` convolutions.
31
+ groups : int
32
+ Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
33
+ bottleneck blocks.
34
+ norm_act : callable
35
+ Function to create normalization / activation Module.
36
+ dropout: callable
37
+ Function to create Dropout Module.
38
+ """
39
+ super(IdentityResidualBlock, self).__init__()
40
+
41
+ # Check parameters for inconsistencies
42
+ if len(channels) != 2 and len(channels) != 3:
43
+ raise ValueError("channels must contain either two or three values")
44
+ if len(channels) == 2 and groups != 1:
45
+ raise ValueError("groups > 1 are only valid if len(channels) == 3")
46
+
47
+ is_bottleneck = len(channels) == 3
48
+ need_proj_conv = stride != 1 or in_channels != channels[-1]
49
+
50
+ self.bn1 = norm_act(in_channels)
51
+ if not is_bottleneck:
52
+ layers = [
53
+ ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
54
+ dilation=dilation)),
55
+ ("bn2", norm_act(channels[0])),
56
+ ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
57
+ dilation=dilation))
58
+ ]
59
+ if dropout is not None:
60
+ layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
61
+ else:
62
+ layers = [
63
+ ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
64
+ ("bn2", norm_act(channels[0])),
65
+ ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
66
+ groups=groups, dilation=dilation)),
67
+ ("bn3", norm_act(channels[1])),
68
+ ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
69
+ ]
70
+ if dropout is not None:
71
+ layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
72
+ self.convs = nn.Sequential(OrderedDict(layers))
73
+
74
+ if need_proj_conv:
75
+ self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
76
+
77
+ def forward(self, x):
78
+ if hasattr(self, "proj_conv"):
79
+ bn1 = self.bn1(x)
80
+ shortcut = self.proj_conv(bn1)
81
+ else:
82
+ shortcut = x.clone()
83
+ bn1 = self.bn1(x)
84
+
85
+ out = self.convs(bn1)
86
+ out.add_(shortcut)
87
+
88
+ return out
face-parse-bisent/modules/src/checks.h ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ATen/ATen.h>
4
+
5
+ // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
6
+ #ifndef AT_CHECK
7
+ #define AT_CHECK AT_ASSERT
8
+ #endif
9
+
10
+ #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
11
+ #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
12
+ #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
13
+
14
+ #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
15
+ #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
face-parse-bisent/modules/src/inplace_abn.cpp ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <torch/extension.h>
2
+
3
+ #include <vector>
4
+
5
+ #include "inplace_abn.h"
6
+
7
+ std::vector<at::Tensor> mean_var(at::Tensor x) {
8
+ if (x.is_cuda()) {
9
+ if (x.type().scalarType() == at::ScalarType::Half) {
10
+ return mean_var_cuda_h(x);
11
+ } else {
12
+ return mean_var_cuda(x);
13
+ }
14
+ } else {
15
+ return mean_var_cpu(x);
16
+ }
17
+ }
18
+
19
+ at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
20
+ bool affine, float eps) {
21
+ if (x.is_cuda()) {
22
+ if (x.type().scalarType() == at::ScalarType::Half) {
23
+ return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
24
+ } else {
25
+ return forward_cuda(x, mean, var, weight, bias, affine, eps);
26
+ }
27
+ } else {
28
+ return forward_cpu(x, mean, var, weight, bias, affine, eps);
29
+ }
30
+ }
31
+
32
+ std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
33
+ bool affine, float eps) {
34
+ if (z.is_cuda()) {
35
+ if (z.type().scalarType() == at::ScalarType::Half) {
36
+ return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
37
+ } else {
38
+ return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
39
+ }
40
+ } else {
41
+ return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
42
+ }
43
+ }
44
+
45
+ at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
46
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
47
+ if (z.is_cuda()) {
48
+ if (z.type().scalarType() == at::ScalarType::Half) {
49
+ return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
50
+ } else {
51
+ return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
52
+ }
53
+ } else {
54
+ return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
55
+ }
56
+ }
57
+
58
+ void leaky_relu_forward(at::Tensor z, float slope) {
59
+ at::leaky_relu_(z, slope);
60
+ }
61
+
62
+ void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
63
+ if (z.is_cuda()) {
64
+ if (z.type().scalarType() == at::ScalarType::Half) {
65
+ return leaky_relu_backward_cuda_h(z, dz, slope);
66
+ } else {
67
+ return leaky_relu_backward_cuda(z, dz, slope);
68
+ }
69
+ } else {
70
+ return leaky_relu_backward_cpu(z, dz, slope);
71
+ }
72
+ }
73
+
74
+ void elu_forward(at::Tensor z) {
75
+ at::elu_(z);
76
+ }
77
+
78
+ void elu_backward(at::Tensor z, at::Tensor dz) {
79
+ if (z.is_cuda()) {
80
+ return elu_backward_cuda(z, dz);
81
+ } else {
82
+ return elu_backward_cpu(z, dz);
83
+ }
84
+ }
85
+
86
+ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
87
+ m.def("mean_var", &mean_var, "Mean and variance computation");
88
+ m.def("forward", &forward, "In-place forward computation");
89
+ m.def("edz_eydz", &edz_eydz, "First part of backward computation");
90
+ m.def("backward", &backward, "Second part of backward computation");
91
+ m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
92
+ m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
93
+ m.def("elu_forward", &elu_forward, "Elu forward computation");
94
+ m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
95
+ }
face-parse-bisent/modules/src/inplace_abn.h ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ATen/ATen.h>
4
+
5
+ #include <vector>
6
+
7
+ std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
8
+ std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
9
+ std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
10
+
11
+ at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
12
+ bool affine, float eps);
13
+ at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
14
+ bool affine, float eps);
15
+ at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
16
+ bool affine, float eps);
17
+
18
+ std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
19
+ bool affine, float eps);
20
+ std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
21
+ bool affine, float eps);
22
+ std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
23
+ bool affine, float eps);
24
+
25
+ at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
26
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps);
27
+ at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
28
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps);
29
+ at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
30
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps);
31
+
32
+ void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
33
+ void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
34
+ void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
35
+
36
+ void elu_backward_cpu(at::Tensor z, at::Tensor dz);
37
+ void elu_backward_cuda(at::Tensor z, at::Tensor dz);
38
+
39
+ static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
40
+ num = x.size(0);
41
+ chn = x.size(1);
42
+ sp = 1;
43
+ for (int64_t i = 2; i < x.ndimension(); ++i)
44
+ sp *= x.size(i);
45
+ }
46
+
47
+ /*
48
+ * Specialized CUDA reduction functions for BN
49
+ */
50
+ #ifdef __CUDACC__
51
+
52
+ #include "utils/cuda.cuh"
53
+
54
+ template <typename T, typename Op>
55
+ __device__ T reduce(Op op, int plane, int N, int S) {
56
+ T sum = (T)0;
57
+ for (int batch = 0; batch < N; ++batch) {
58
+ for (int x = threadIdx.x; x < S; x += blockDim.x) {
59
+ sum += op(batch, plane, x);
60
+ }
61
+ }
62
+
63
+ // sum over NumThreads within a warp
64
+ sum = warpSum(sum);
65
+
66
+ // 'transpose', and reduce within warp again
67
+ __shared__ T shared[32];
68
+ __syncthreads();
69
+ if (threadIdx.x % WARP_SIZE == 0) {
70
+ shared[threadIdx.x / WARP_SIZE] = sum;
71
+ }
72
+ if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
73
+ // zero out the other entries in shared
74
+ shared[threadIdx.x] = (T)0;
75
+ }
76
+ __syncthreads();
77
+ if (threadIdx.x / WARP_SIZE == 0) {
78
+ sum = warpSum(shared[threadIdx.x]);
79
+ if (threadIdx.x == 0) {
80
+ shared[0] = sum;
81
+ }
82
+ }
83
+ __syncthreads();
84
+
85
+ // Everyone picks it up, should be broadcast into the whole gradInput
86
+ return shared[0];
87
+ }
88
+ #endif
face-parse-bisent/modules/src/inplace_abn_cpu.cpp ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <ATen/ATen.h>
2
+
3
+ #include <vector>
4
+
5
+ #include "utils/checks.h"
6
+ #include "inplace_abn.h"
7
+
8
+ at::Tensor reduce_sum(at::Tensor x) {
9
+ if (x.ndimension() == 2) {
10
+ return x.sum(0);
11
+ } else {
12
+ auto x_view = x.view({x.size(0), x.size(1), -1});
13
+ return x_view.sum(-1).sum(0);
14
+ }
15
+ }
16
+
17
+ at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
18
+ if (x.ndimension() == 2) {
19
+ return v;
20
+ } else {
21
+ std::vector<int64_t> broadcast_size = {1, -1};
22
+ for (int64_t i = 2; i < x.ndimension(); ++i)
23
+ broadcast_size.push_back(1);
24
+
25
+ return v.view(broadcast_size);
26
+ }
27
+ }
28
+
29
+ int64_t count(at::Tensor x) {
30
+ int64_t count = x.size(0);
31
+ for (int64_t i = 2; i < x.ndimension(); ++i)
32
+ count *= x.size(i);
33
+
34
+ return count;
35
+ }
36
+
37
+ at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
38
+ if (affine) {
39
+ return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
40
+ } else {
41
+ return z;
42
+ }
43
+ }
44
+
45
+ std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
46
+ auto num = count(x);
47
+ auto mean = reduce_sum(x) / num;
48
+ auto diff = x - broadcast_to(mean, x);
49
+ auto var = reduce_sum(diff.pow(2)) / num;
50
+
51
+ return {mean, var};
52
+ }
53
+
54
+ at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
55
+ bool affine, float eps) {
56
+ auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
57
+ auto mul = at::rsqrt(var + eps) * gamma;
58
+
59
+ x.sub_(broadcast_to(mean, x));
60
+ x.mul_(broadcast_to(mul, x));
61
+ if (affine) x.add_(broadcast_to(bias, x));
62
+
63
+ return x;
64
+ }
65
+
66
+ std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
67
+ bool affine, float eps) {
68
+ auto edz = reduce_sum(dz);
69
+ auto y = invert_affine(z, weight, bias, affine, eps);
70
+ auto eydz = reduce_sum(y * dz);
71
+
72
+ return {edz, eydz};
73
+ }
74
+
75
+ at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
76
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
77
+ auto y = invert_affine(z, weight, bias, affine, eps);
78
+ auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
79
+
80
+ auto num = count(z);
81
+ auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
82
+ return dx;
83
+ }
84
+
85
+ void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
86
+ CHECK_CPU_INPUT(z);
87
+ CHECK_CPU_INPUT(dz);
88
+
89
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
90
+ int64_t count = z.numel();
91
+ auto *_z = z.data<scalar_t>();
92
+ auto *_dz = dz.data<scalar_t>();
93
+
94
+ for (int64_t i = 0; i < count; ++i) {
95
+ if (_z[i] < 0) {
96
+ _z[i] *= 1 / slope;
97
+ _dz[i] *= slope;
98
+ }
99
+ }
100
+ }));
101
+ }
102
+
103
+ void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
104
+ CHECK_CPU_INPUT(z);
105
+ CHECK_CPU_INPUT(dz);
106
+
107
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
108
+ int64_t count = z.numel();
109
+ auto *_z = z.data<scalar_t>();
110
+ auto *_dz = dz.data<scalar_t>();
111
+
112
+ for (int64_t i = 0; i < count; ++i) {
113
+ if (_z[i] < 0) {
114
+ _z[i] = log1p(_z[i]);
115
+ _dz[i] *= (_z[i] + 1.f);
116
+ }
117
+ }
118
+ }));
119
+ }
face-parse-bisent/modules/src/inplace_abn_cuda.cu ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <ATen/ATen.h>
2
+
3
+ #include <thrust/device_ptr.h>
4
+ #include <thrust/transform.h>
5
+
6
+ #include <vector>
7
+
8
+ #include "utils/checks.h"
9
+ #include "utils/cuda.cuh"
10
+ #include "inplace_abn.h"
11
+
12
+ #include <ATen/cuda/CUDAContext.h>
13
+
14
+ // Operations for reduce
15
+ template<typename T>
16
+ struct SumOp {
17
+ __device__ SumOp(const T *t, int c, int s)
18
+ : tensor(t), chn(c), sp(s) {}
19
+ __device__ __forceinline__ T operator()(int batch, int plane, int n) {
20
+ return tensor[(batch * chn + plane) * sp + n];
21
+ }
22
+ const T *tensor;
23
+ const int chn;
24
+ const int sp;
25
+ };
26
+
27
+ template<typename T>
28
+ struct VarOp {
29
+ __device__ VarOp(T m, const T *t, int c, int s)
30
+ : mean(m), tensor(t), chn(c), sp(s) {}
31
+ __device__ __forceinline__ T operator()(int batch, int plane, int n) {
32
+ T val = tensor[(batch * chn + plane) * sp + n];
33
+ return (val - mean) * (val - mean);
34
+ }
35
+ const T mean;
36
+ const T *tensor;
37
+ const int chn;
38
+ const int sp;
39
+ };
40
+
41
+ template<typename T>
42
+ struct GradOp {
43
+ __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
44
+ : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
45
+ __device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
46
+ T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
47
+ T _dz = dz[(batch * chn + plane) * sp + n];
48
+ return Pair<T>(_dz, _y * _dz);
49
+ }
50
+ const T weight;
51
+ const T bias;
52
+ const T *z;
53
+ const T *dz;
54
+ const int chn;
55
+ const int sp;
56
+ };
57
+
58
+ /***********
59
+ * mean_var
60
+ ***********/
61
+
62
+ template<typename T>
63
+ __global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
64
+ int plane = blockIdx.x;
65
+ T norm = T(1) / T(num * sp);
66
+
67
+ T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
68
+ __syncthreads();
69
+ T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
70
+
71
+ if (threadIdx.x == 0) {
72
+ mean[plane] = _mean;
73
+ var[plane] = _var;
74
+ }
75
+ }
76
+
77
+ std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
78
+ CHECK_CUDA_INPUT(x);
79
+
80
+ // Extract dimensions
81
+ int64_t num, chn, sp;
82
+ get_dims(x, num, chn, sp);
83
+
84
+ // Prepare output tensors
85
+ auto mean = at::empty({chn}, x.options());
86
+ auto var = at::empty({chn}, x.options());
87
+
88
+ // Run kernel
89
+ dim3 blocks(chn);
90
+ dim3 threads(getNumThreads(sp));
91
+ auto stream = at::cuda::getCurrentCUDAStream();
92
+ AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
93
+ mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
94
+ x.data<scalar_t>(),
95
+ mean.data<scalar_t>(),
96
+ var.data<scalar_t>(),
97
+ num, chn, sp);
98
+ }));
99
+
100
+ return {mean, var};
101
+ }
102
+
103
+ /**********
104
+ * forward
105
+ **********/
106
+
107
+ template<typename T>
108
+ __global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
109
+ bool affine, float eps, int num, int chn, int sp) {
110
+ int plane = blockIdx.x;
111
+
112
+ T _mean = mean[plane];
113
+ T _var = var[plane];
114
+ T _weight = affine ? abs(weight[plane]) + eps : T(1);
115
+ T _bias = affine ? bias[plane] : T(0);
116
+
117
+ T mul = rsqrt(_var + eps) * _weight;
118
+
119
+ for (int batch = 0; batch < num; ++batch) {
120
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
121
+ T _x = x[(batch * chn + plane) * sp + n];
122
+ T _y = (_x - _mean) * mul + _bias;
123
+
124
+ x[(batch * chn + plane) * sp + n] = _y;
125
+ }
126
+ }
127
+ }
128
+
129
+ at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
130
+ bool affine, float eps) {
131
+ CHECK_CUDA_INPUT(x);
132
+ CHECK_CUDA_INPUT(mean);
133
+ CHECK_CUDA_INPUT(var);
134
+ CHECK_CUDA_INPUT(weight);
135
+ CHECK_CUDA_INPUT(bias);
136
+
137
+ // Extract dimensions
138
+ int64_t num, chn, sp;
139
+ get_dims(x, num, chn, sp);
140
+
141
+ // Run kernel
142
+ dim3 blocks(chn);
143
+ dim3 threads(getNumThreads(sp));
144
+ auto stream = at::cuda::getCurrentCUDAStream();
145
+ AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
146
+ forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
147
+ x.data<scalar_t>(),
148
+ mean.data<scalar_t>(),
149
+ var.data<scalar_t>(),
150
+ weight.data<scalar_t>(),
151
+ bias.data<scalar_t>(),
152
+ affine, eps, num, chn, sp);
153
+ }));
154
+
155
+ return x;
156
+ }
157
+
158
+ /***********
159
+ * edz_eydz
160
+ ***********/
161
+
162
+ template<typename T>
163
+ __global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
164
+ T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
165
+ int plane = blockIdx.x;
166
+
167
+ T _weight = affine ? abs(weight[plane]) + eps : 1.f;
168
+ T _bias = affine ? bias[plane] : 0.f;
169
+
170
+ Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
171
+ __syncthreads();
172
+
173
+ if (threadIdx.x == 0) {
174
+ edz[plane] = res.v1;
175
+ eydz[plane] = res.v2;
176
+ }
177
+ }
178
+
179
+ std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
180
+ bool affine, float eps) {
181
+ CHECK_CUDA_INPUT(z);
182
+ CHECK_CUDA_INPUT(dz);
183
+ CHECK_CUDA_INPUT(weight);
184
+ CHECK_CUDA_INPUT(bias);
185
+
186
+ // Extract dimensions
187
+ int64_t num, chn, sp;
188
+ get_dims(z, num, chn, sp);
189
+
190
+ auto edz = at::empty({chn}, z.options());
191
+ auto eydz = at::empty({chn}, z.options());
192
+
193
+ // Run kernel
194
+ dim3 blocks(chn);
195
+ dim3 threads(getNumThreads(sp));
196
+ auto stream = at::cuda::getCurrentCUDAStream();
197
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
198
+ edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
199
+ z.data<scalar_t>(),
200
+ dz.data<scalar_t>(),
201
+ weight.data<scalar_t>(),
202
+ bias.data<scalar_t>(),
203
+ edz.data<scalar_t>(),
204
+ eydz.data<scalar_t>(),
205
+ affine, eps, num, chn, sp);
206
+ }));
207
+
208
+ return {edz, eydz};
209
+ }
210
+
211
+ /***********
212
+ * backward
213
+ ***********/
214
+
215
+ template<typename T>
216
+ __global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
217
+ const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
218
+ int plane = blockIdx.x;
219
+
220
+ T _weight = affine ? abs(weight[plane]) + eps : 1.f;
221
+ T _bias = affine ? bias[plane] : 0.f;
222
+ T _var = var[plane];
223
+ T _edz = edz[plane];
224
+ T _eydz = eydz[plane];
225
+
226
+ T _mul = _weight * rsqrt(_var + eps);
227
+ T count = T(num * sp);
228
+
229
+ for (int batch = 0; batch < num; ++batch) {
230
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
231
+ T _dz = dz[(batch * chn + plane) * sp + n];
232
+ T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
233
+
234
+ dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
235
+ }
236
+ }
237
+ }
238
+
239
+ at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
240
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
241
+ CHECK_CUDA_INPUT(z);
242
+ CHECK_CUDA_INPUT(dz);
243
+ CHECK_CUDA_INPUT(var);
244
+ CHECK_CUDA_INPUT(weight);
245
+ CHECK_CUDA_INPUT(bias);
246
+ CHECK_CUDA_INPUT(edz);
247
+ CHECK_CUDA_INPUT(eydz);
248
+
249
+ // Extract dimensions
250
+ int64_t num, chn, sp;
251
+ get_dims(z, num, chn, sp);
252
+
253
+ auto dx = at::zeros_like(z);
254
+
255
+ // Run kernel
256
+ dim3 blocks(chn);
257
+ dim3 threads(getNumThreads(sp));
258
+ auto stream = at::cuda::getCurrentCUDAStream();
259
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
260
+ backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
261
+ z.data<scalar_t>(),
262
+ dz.data<scalar_t>(),
263
+ var.data<scalar_t>(),
264
+ weight.data<scalar_t>(),
265
+ bias.data<scalar_t>(),
266
+ edz.data<scalar_t>(),
267
+ eydz.data<scalar_t>(),
268
+ dx.data<scalar_t>(),
269
+ affine, eps, num, chn, sp);
270
+ }));
271
+
272
+ return dx;
273
+ }
274
+
275
+ /**************
276
+ * activations
277
+ **************/
278
+
279
+ template<typename T>
280
+ inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
281
+ // Create thrust pointers
282
+ thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
283
+ thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
284
+
285
+ auto stream = at::cuda::getCurrentCUDAStream();
286
+ thrust::transform_if(thrust::cuda::par.on(stream),
287
+ th_dz, th_dz + count, th_z, th_dz,
288
+ [slope] __device__ (const T& dz) { return dz * slope; },
289
+ [] __device__ (const T& z) { return z < 0; });
290
+ thrust::transform_if(thrust::cuda::par.on(stream),
291
+ th_z, th_z + count, th_z,
292
+ [slope] __device__ (const T& z) { return z / slope; },
293
+ [] __device__ (const T& z) { return z < 0; });
294
+ }
295
+
296
+ void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
297
+ CHECK_CUDA_INPUT(z);
298
+ CHECK_CUDA_INPUT(dz);
299
+
300
+ int64_t count = z.numel();
301
+
302
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
303
+ leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
304
+ }));
305
+ }
306
+
307
+ template<typename T>
308
+ inline void elu_backward_impl(T *z, T *dz, int64_t count) {
309
+ // Create thrust pointers
310
+ thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
311
+ thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
312
+
313
+ auto stream = at::cuda::getCurrentCUDAStream();
314
+ thrust::transform_if(thrust::cuda::par.on(stream),
315
+ th_dz, th_dz + count, th_z, th_z, th_dz,
316
+ [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
317
+ [] __device__ (const T& z) { return z < 0; });
318
+ thrust::transform_if(thrust::cuda::par.on(stream),
319
+ th_z, th_z + count, th_z,
320
+ [] __device__ (const T& z) { return log1p(z); },
321
+ [] __device__ (const T& z) { return z < 0; });
322
+ }
323
+
324
+ void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
325
+ CHECK_CUDA_INPUT(z);
326
+ CHECK_CUDA_INPUT(dz);
327
+
328
+ int64_t count = z.numel();
329
+
330
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
331
+ elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
332
+ }));
333
+ }
face-parse-bisent/modules/src/inplace_abn_cuda_half.cu ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <ATen/ATen.h>
2
+
3
+ #include <cuda_fp16.h>
4
+
5
+ #include <vector>
6
+
7
+ #include "utils/checks.h"
8
+ #include "utils/cuda.cuh"
9
+ #include "inplace_abn.h"
10
+
11
+ #include <ATen/cuda/CUDAContext.h>
12
+
13
+ // Operations for reduce
14
+ struct SumOpH {
15
+ __device__ SumOpH(const half *t, int c, int s)
16
+ : tensor(t), chn(c), sp(s) {}
17
+ __device__ __forceinline__ float operator()(int batch, int plane, int n) {
18
+ return __half2float(tensor[(batch * chn + plane) * sp + n]);
19
+ }
20
+ const half *tensor;
21
+ const int chn;
22
+ const int sp;
23
+ };
24
+
25
+ struct VarOpH {
26
+ __device__ VarOpH(float m, const half *t, int c, int s)
27
+ : mean(m), tensor(t), chn(c), sp(s) {}
28
+ __device__ __forceinline__ float operator()(int batch, int plane, int n) {
29
+ const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
30
+ return (t - mean) * (t - mean);
31
+ }
32
+ const float mean;
33
+ const half *tensor;
34
+ const int chn;
35
+ const int sp;
36
+ };
37
+
38
+ struct GradOpH {
39
+ __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
40
+ : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
41
+ __device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
42
+ float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
43
+ float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
44
+ return Pair<float>(_dz, _y * _dz);
45
+ }
46
+ const float weight;
47
+ const float bias;
48
+ const half *z;
49
+ const half *dz;
50
+ const int chn;
51
+ const int sp;
52
+ };
53
+
54
+ /***********
55
+ * mean_var
56
+ ***********/
57
+
58
+ __global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
59
+ int plane = blockIdx.x;
60
+ float norm = 1.f / static_cast<float>(num * sp);
61
+
62
+ float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
63
+ __syncthreads();
64
+ float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
65
+
66
+ if (threadIdx.x == 0) {
67
+ mean[plane] = _mean;
68
+ var[plane] = _var;
69
+ }
70
+ }
71
+
72
+ std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
73
+ CHECK_CUDA_INPUT(x);
74
+
75
+ // Extract dimensions
76
+ int64_t num, chn, sp;
77
+ get_dims(x, num, chn, sp);
78
+
79
+ // Prepare output tensors
80
+ auto mean = at::empty({chn},x.options().dtype(at::kFloat));
81
+ auto var = at::empty({chn},x.options().dtype(at::kFloat));
82
+
83
+ // Run kernel
84
+ dim3 blocks(chn);
85
+ dim3 threads(getNumThreads(sp));
86
+ auto stream = at::cuda::getCurrentCUDAStream();
87
+ mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
88
+ reinterpret_cast<half*>(x.data<at::Half>()),
89
+ mean.data<float>(),
90
+ var.data<float>(),
91
+ num, chn, sp);
92
+
93
+ return {mean, var};
94
+ }
95
+
96
+ /**********
97
+ * forward
98
+ **********/
99
+
100
+ __global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
101
+ bool affine, float eps, int num, int chn, int sp) {
102
+ int plane = blockIdx.x;
103
+
104
+ const float _mean = mean[plane];
105
+ const float _var = var[plane];
106
+ const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
107
+ const float _bias = affine ? bias[plane] : 0.f;
108
+
109
+ const float mul = rsqrt(_var + eps) * _weight;
110
+
111
+ for (int batch = 0; batch < num; ++batch) {
112
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
113
+ half *x_ptr = x + (batch * chn + plane) * sp + n;
114
+ float _x = __half2float(*x_ptr);
115
+ float _y = (_x - _mean) * mul + _bias;
116
+
117
+ *x_ptr = __float2half(_y);
118
+ }
119
+ }
120
+ }
121
+
122
+ at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
123
+ bool affine, float eps) {
124
+ CHECK_CUDA_INPUT(x);
125
+ CHECK_CUDA_INPUT(mean);
126
+ CHECK_CUDA_INPUT(var);
127
+ CHECK_CUDA_INPUT(weight);
128
+ CHECK_CUDA_INPUT(bias);
129
+
130
+ // Extract dimensions
131
+ int64_t num, chn, sp;
132
+ get_dims(x, num, chn, sp);
133
+
134
+ // Run kernel
135
+ dim3 blocks(chn);
136
+ dim3 threads(getNumThreads(sp));
137
+ auto stream = at::cuda::getCurrentCUDAStream();
138
+ forward_kernel_h<<<blocks, threads, 0, stream>>>(
139
+ reinterpret_cast<half*>(x.data<at::Half>()),
140
+ mean.data<float>(),
141
+ var.data<float>(),
142
+ weight.data<float>(),
143
+ bias.data<float>(),
144
+ affine, eps, num, chn, sp);
145
+
146
+ return x;
147
+ }
148
+
149
+ __global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
150
+ float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
151
+ int plane = blockIdx.x;
152
+
153
+ float _weight = affine ? abs(weight[plane]) + eps : 1.f;
154
+ float _bias = affine ? bias[plane] : 0.f;
155
+
156
+ Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
157
+ __syncthreads();
158
+
159
+ if (threadIdx.x == 0) {
160
+ edz[plane] = res.v1;
161
+ eydz[plane] = res.v2;
162
+ }
163
+ }
164
+
165
+ std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
166
+ bool affine, float eps) {
167
+ CHECK_CUDA_INPUT(z);
168
+ CHECK_CUDA_INPUT(dz);
169
+ CHECK_CUDA_INPUT(weight);
170
+ CHECK_CUDA_INPUT(bias);
171
+
172
+ // Extract dimensions
173
+ int64_t num, chn, sp;
174
+ get_dims(z, num, chn, sp);
175
+
176
+ auto edz = at::empty({chn},z.options().dtype(at::kFloat));
177
+ auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
178
+
179
+ // Run kernel
180
+ dim3 blocks(chn);
181
+ dim3 threads(getNumThreads(sp));
182
+ auto stream = at::cuda::getCurrentCUDAStream();
183
+ edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
184
+ reinterpret_cast<half*>(z.data<at::Half>()),
185
+ reinterpret_cast<half*>(dz.data<at::Half>()),
186
+ weight.data<float>(),
187
+ bias.data<float>(),
188
+ edz.data<float>(),
189
+ eydz.data<float>(),
190
+ affine, eps, num, chn, sp);
191
+
192
+ return {edz, eydz};
193
+ }
194
+
195
+ __global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
196
+ const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
197
+ int plane = blockIdx.x;
198
+
199
+ float _weight = affine ? abs(weight[plane]) + eps : 1.f;
200
+ float _bias = affine ? bias[plane] : 0.f;
201
+ float _var = var[plane];
202
+ float _edz = edz[plane];
203
+ float _eydz = eydz[plane];
204
+
205
+ float _mul = _weight * rsqrt(_var + eps);
206
+ float count = float(num * sp);
207
+
208
+ for (int batch = 0; batch < num; ++batch) {
209
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
210
+ float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
211
+ float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
212
+
213
+ dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
214
+ }
215
+ }
216
+ }
217
+
218
+ at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
219
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
220
+ CHECK_CUDA_INPUT(z);
221
+ CHECK_CUDA_INPUT(dz);
222
+ CHECK_CUDA_INPUT(var);
223
+ CHECK_CUDA_INPUT(weight);
224
+ CHECK_CUDA_INPUT(bias);
225
+ CHECK_CUDA_INPUT(edz);
226
+ CHECK_CUDA_INPUT(eydz);
227
+
228
+ // Extract dimensions
229
+ int64_t num, chn, sp;
230
+ get_dims(z, num, chn, sp);
231
+
232
+ auto dx = at::zeros_like(z);
233
+
234
+ // Run kernel
235
+ dim3 blocks(chn);
236
+ dim3 threads(getNumThreads(sp));
237
+ auto stream = at::cuda::getCurrentCUDAStream();
238
+ backward_kernel_h<<<blocks, threads, 0, stream>>>(
239
+ reinterpret_cast<half*>(z.data<at::Half>()),
240
+ reinterpret_cast<half*>(dz.data<at::Half>()),
241
+ var.data<float>(),
242
+ weight.data<float>(),
243
+ bias.data<float>(),
244
+ edz.data<float>(),
245
+ eydz.data<float>(),
246
+ reinterpret_cast<half*>(dx.data<at::Half>()),
247
+ affine, eps, num, chn, sp);
248
+
249
+ return dx;
250
+ }
251
+
252
+ __global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
253
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){
254
+ float _z = __half2float(z[i]);
255
+ if (_z < 0) {
256
+ dz[i] = __float2half(__half2float(dz[i]) * slope);
257
+ z[i] = __float2half(_z / slope);
258
+ }
259
+ }
260
+ }
261
+
262
+ void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
263
+ CHECK_CUDA_INPUT(z);
264
+ CHECK_CUDA_INPUT(dz);
265
+
266
+ int64_t count = z.numel();
267
+ dim3 threads(getNumThreads(count));
268
+ dim3 blocks = (count + threads.x - 1) / threads.x;
269
+ auto stream = at::cuda::getCurrentCUDAStream();
270
+ leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
271
+ reinterpret_cast<half*>(z.data<at::Half>()),
272
+ reinterpret_cast<half*>(dz.data<at::Half>()),
273
+ slope, count);
274
+ }
275
+
face-parse-bisent/modules/src/utils/checks.h ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ATen/ATen.h>
4
+
5
+ // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
6
+ #ifndef AT_CHECK
7
+ #define AT_CHECK AT_ASSERT
8
+ #endif
9
+
10
+ #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
11
+ #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
12
+ #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
13
+
14
+ #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
15
+ #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
face-parse-bisent/modules/src/utils/common.h ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <ATen/ATen.h>
4
+
5
+ /*
6
+ * Functions to share code between CPU and GPU
7
+ */
8
+
9
+ #ifdef __CUDACC__
10
+ // CUDA versions
11
+
12
+ #define HOST_DEVICE __host__ __device__
13
+ #define INLINE_HOST_DEVICE __host__ __device__ inline
14
+ #define FLOOR(x) floor(x)
15
+
16
+ #if __CUDA_ARCH__ >= 600
17
+ // Recent compute capabilities have block-level atomicAdd for all data types, so we use that
18
+ #define ACCUM(x,y) atomicAdd_block(&(x),(y))
19
+ #else
20
+ // Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
21
+ // and use the known atomicCAS-based implementation for double
22
+ template<typename data_t>
23
+ __device__ inline data_t atomic_add(data_t *address, data_t val) {
24
+ return atomicAdd(address, val);
25
+ }
26
+
27
+ template<>
28
+ __device__ inline double atomic_add(double *address, double val) {
29
+ unsigned long long int* address_as_ull = (unsigned long long int*)address;
30
+ unsigned long long int old = *address_as_ull, assumed;
31
+ do {
32
+ assumed = old;
33
+ old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
34
+ } while (assumed != old);
35
+ return __longlong_as_double(old);
36
+ }
37
+
38
+ #define ACCUM(x,y) atomic_add(&(x),(y))
39
+ #endif // #if __CUDA_ARCH__ >= 600
40
+
41
+ #else
42
+ // CPU versions
43
+
44
+ #define HOST_DEVICE
45
+ #define INLINE_HOST_DEVICE inline
46
+ #define FLOOR(x) std::floor(x)
47
+ #define ACCUM(x,y) (x) += (y)
48
+
49
+ #endif // #ifdef __CUDACC__
face-parse-bisent/modules/src/utils/cuda.cuh ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ /*
4
+ * General settings and functions
5
+ */
6
+ const int WARP_SIZE = 32;
7
+ const int MAX_BLOCK_SIZE = 1024;
8
+
9
+ static int getNumThreads(int nElem) {
10
+ int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
11
+ for (int i = 0; i < 6; ++i) {
12
+ if (nElem <= threadSizes[i]) {
13
+ return threadSizes[i];
14
+ }
15
+ }
16
+ return MAX_BLOCK_SIZE;
17
+ }
18
+
19
+ /*
20
+ * Reduction utilities
21
+ */
22
+ template <typename T>
23
+ __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
24
+ unsigned int mask = 0xffffffff) {
25
+ #if CUDART_VERSION >= 9000
26
+ return __shfl_xor_sync(mask, value, laneMask, width);
27
+ #else
28
+ return __shfl_xor(value, laneMask, width);
29
+ #endif
30
+ }
31
+
32
+ __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
33
+
34
+ template<typename T>
35
+ struct Pair {
36
+ T v1, v2;
37
+ __device__ Pair() {}
38
+ __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
39
+ __device__ Pair(T v) : v1(v), v2(v) {}
40
+ __device__ Pair(int v) : v1(v), v2(v) {}
41
+ __device__ Pair &operator+=(const Pair<T> &a) {
42
+ v1 += a.v1;
43
+ v2 += a.v2;
44
+ return *this;
45
+ }
46
+ };
47
+
48
+ template<typename T>
49
+ static __device__ __forceinline__ T warpSum(T val) {
50
+ #if __CUDA_ARCH__ >= 300
51
+ for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
52
+ val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
53
+ }
54
+ #else
55
+ __shared__ T values[MAX_BLOCK_SIZE];
56
+ values[threadIdx.x] = val;
57
+ __threadfence_block();
58
+ const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
59
+ for (int i = 1; i < WARP_SIZE; i++) {
60
+ val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
61
+ }
62
+ #endif
63
+ return val;
64
+ }
65
+
66
+ template<typename T>
67
+ static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
68
+ value.v1 = warpSum(value.v1);
69
+ value.v2 = warpSum(value.v2);
70
+ return value;
71
+ }
face-parse-bisent/optimizer.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+
5
+ import torch
6
+ import logging
7
+
8
+ logger = logging.getLogger()
9
+
10
+ class Optimizer(object):
11
+ def __init__(self,
12
+ model,
13
+ lr0,
14
+ momentum,
15
+ wd,
16
+ warmup_steps,
17
+ warmup_start_lr,
18
+ max_iter,
19
+ power,
20
+ *args, **kwargs):
21
+ self.warmup_steps = warmup_steps
22
+ self.warmup_start_lr = warmup_start_lr
23
+ self.lr0 = lr0
24
+ self.lr = self.lr0
25
+ self.max_iter = float(max_iter)
26
+ self.power = power
27
+ self.it = 0
28
+ wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params()
29
+ param_list = [
30
+ {'params': wd_params},
31
+ {'params': nowd_params, 'weight_decay': 0},
32
+ {'params': lr_mul_wd_params, 'lr_mul': True},
33
+ {'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}]
34
+ self.optim = torch.optim.SGD(
35
+ param_list,
36
+ lr = lr0,
37
+ momentum = momentum,
38
+ weight_decay = wd)
39
+ self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps)
40
+
41
+
42
+ def get_lr(self):
43
+ if self.it <= self.warmup_steps:
44
+ lr = self.warmup_start_lr*(self.warmup_factor**self.it)
45
+ else:
46
+ factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power
47
+ lr = self.lr0 * factor
48
+ return lr
49
+
50
+
51
+ def step(self):
52
+ self.lr = self.get_lr()
53
+ for pg in self.optim.param_groups:
54
+ if pg.get('lr_mul', False):
55
+ pg['lr'] = self.lr * 10
56
+ else:
57
+ pg['lr'] = self.lr
58
+ if self.optim.defaults.get('lr_mul', False):
59
+ self.optim.defaults['lr'] = self.lr * 10
60
+ else:
61
+ self.optim.defaults['lr'] = self.lr
62
+ self.it += 1
63
+ self.optim.step()
64
+ if self.it == self.warmup_steps+2:
65
+ logger.info('==> warmup done, start to implement poly lr strategy')
66
+
67
+ def zero_grad(self):
68
+ self.optim.zero_grad()
69
+
face-parse-bisent/prepropess_data.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ import os.path as osp
5
+ import os
6
+ import cv2
7
+ from transform import *
8
+ from PIL import Image
9
+
10
+ face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
11
+ face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
12
+ mask_path = '/home/zll/data/CelebAMask-HQ/mask'
13
+ counter = 0
14
+ total = 0
15
+ for i in range(15):
16
+
17
+ atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
18
+ 'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
19
+
20
+ for j in range(i * 2000, (i + 1) * 2000):
21
+
22
+ mask = np.zeros((512, 512))
23
+
24
+ for l, att in enumerate(atts, 1):
25
+ total += 1
26
+ file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
27
+ path = osp.join(face_sep_mask, str(i), file_name)
28
+
29
+ if os.path.exists(path):
30
+ counter += 1
31
+ sep_mask = np.array(Image.open(path).convert('P'))
32
+ # print(np.unique(sep_mask))
33
+
34
+ mask[sep_mask == 225] = l
35
+ cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
36
+ print(j)
37
+
38
+ print(counter, total)
face-parse-bisent/resnet.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ import torch.utils.model_zoo as modelzoo
8
+
9
+ # from modules.bn import InPlaceABNSync as BatchNorm2d
10
+
11
+ resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
12
+
13
+
14
+ def conv3x3(in_planes, out_planes, stride=1):
15
+ """3x3 convolution with padding"""
16
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
17
+ padding=1, bias=False)
18
+
19
+
20
+ class BasicBlock(nn.Module):
21
+ def __init__(self, in_chan, out_chan, stride=1):
22
+ super(BasicBlock, self).__init__()
23
+ self.conv1 = conv3x3(in_chan, out_chan, stride)
24
+ self.bn1 = nn.BatchNorm2d(out_chan)
25
+ self.conv2 = conv3x3(out_chan, out_chan)
26
+ self.bn2 = nn.BatchNorm2d(out_chan)
27
+ self.relu = nn.ReLU(inplace=True)
28
+ self.downsample = None
29
+ if in_chan != out_chan or stride != 1:
30
+ self.downsample = nn.Sequential(
31
+ nn.Conv2d(in_chan, out_chan,
32
+ kernel_size=1, stride=stride, bias=False),
33
+ nn.BatchNorm2d(out_chan),
34
+ )
35
+
36
+ def forward(self, x):
37
+ residual = self.conv1(x)
38
+ residual = F.relu(self.bn1(residual))
39
+ residual = self.conv2(residual)
40
+ residual = self.bn2(residual)
41
+
42
+ shortcut = x
43
+ if self.downsample is not None:
44
+ shortcut = self.downsample(x)
45
+
46
+ out = shortcut + residual
47
+ out = self.relu(out)
48
+ return out
49
+
50
+
51
+ def create_layer_basic(in_chan, out_chan, bnum, stride=1):
52
+ layers = [BasicBlock(in_chan, out_chan, stride=stride)]
53
+ for i in range(bnum-1):
54
+ layers.append(BasicBlock(out_chan, out_chan, stride=1))
55
+ return nn.Sequential(*layers)
56
+
57
+
58
+ class Resnet18(nn.Module):
59
+ def __init__(self):
60
+ super(Resnet18, self).__init__()
61
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
62
+ bias=False)
63
+ self.bn1 = nn.BatchNorm2d(64)
64
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
65
+ self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
66
+ self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
67
+ self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
68
+ self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
69
+ self.init_weight()
70
+
71
+ def forward(self, x):
72
+ x = self.conv1(x)
73
+ x = F.relu(self.bn1(x))
74
+ x = self.maxpool(x)
75
+
76
+ x = self.layer1(x)
77
+ feat8 = self.layer2(x) # 1/8
78
+ feat16 = self.layer3(feat8) # 1/16
79
+ feat32 = self.layer4(feat16) # 1/32
80
+ return feat8, feat16, feat32
81
+
82
+ def init_weight(self):
83
+ state_dict = modelzoo.load_url(resnet18_url)
84
+ self_state_dict = self.state_dict()
85
+ for k, v in state_dict.items():
86
+ if 'fc' in k: continue
87
+ self_state_dict.update({k: v})
88
+ self.load_state_dict(self_state_dict)
89
+
90
+ def get_params(self):
91
+ wd_params, nowd_params = [], []
92
+ for name, module in self.named_modules():
93
+ if isinstance(module, (nn.Linear, nn.Conv2d)):
94
+ wd_params.append(module.weight)
95
+ if not module.bias is None:
96
+ nowd_params.append(module.bias)
97
+ elif isinstance(module, nn.BatchNorm2d):
98
+ nowd_params += list(module.parameters())
99
+ return wd_params, nowd_params
100
+
101
+
102
+ if __name__ == "__main__":
103
+ net = Resnet18()
104
+ x = torch.randn(16, 3, 224, 224)
105
+ out = net(x)
106
+ print(out[0].size())
107
+ print(out[1].size())
108
+ print(out[2].size())
109
+ net.get_params()
face-parse-bisent/resnet18-5c106cde.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c106cde386e87d4033832f2996f5493238eda96ccf559d1d62760c4de0613f8
3
+ size 46827520
face-parse-bisent/test.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ from logger import setup_logger
5
+ from model import BiSeNet
6
+
7
+ import torch
8
+
9
+ import os
10
+ import os.path as osp
11
+ import numpy as np
12
+ from PIL import Image
13
+ import torchvision.transforms as transforms
14
+ import cv2
15
+
16
+ def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
17
+ # Colors for all 20 parts
18
+ part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
19
+ [255, 0, 85], [255, 0, 170],
20
+ [0, 255, 0], [85, 255, 0], [170, 255, 0],
21
+ [0, 255, 85], [0, 255, 170],
22
+ [0, 0, 255], [85, 0, 255], [170, 0, 255],
23
+ [0, 85, 255], [0, 170, 255],
24
+ [255, 255, 0], [255, 255, 85], [255, 255, 170],
25
+ [255, 0, 255], [255, 85, 255], [255, 170, 255],
26
+ [0, 255, 255], [85, 255, 255], [170, 255, 255]]
27
+
28
+ im = np.array(im)
29
+ vis_im = im.copy().astype(np.uint8)
30
+ vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
31
+ vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
32
+ vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
33
+
34
+ num_of_class = np.max(vis_parsing_anno)
35
+
36
+ for pi in range(1, num_of_class + 1):
37
+ index = np.where(vis_parsing_anno == pi)
38
+ vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
39
+
40
+ vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
41
+ # print(vis_parsing_anno_color.shape, vis_im.shape)
42
+ vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
43
+
44
+ # Save result or not
45
+ if save_im:
46
+ cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno)
47
+ cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
48
+
49
+ # return vis_im
50
+
51
+ def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
52
+
53
+ if not os.path.exists(respth):
54
+ os.makedirs(respth)
55
+
56
+ n_classes = 19
57
+ net = BiSeNet(n_classes=n_classes)
58
+ net.cuda()
59
+ save_pth = osp.join('res/cp', cp)
60
+ net.load_state_dict(torch.load(save_pth))
61
+ net.eval()
62
+
63
+ to_tensor = transforms.Compose([
64
+ transforms.ToTensor(),
65
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
66
+ ])
67
+ with torch.no_grad():
68
+ for image_path in os.listdir(dspth):
69
+ img = Image.open(osp.join(dspth, image_path))
70
+ image = img.resize((512, 512), Image.BILINEAR)
71
+ img = to_tensor(image)
72
+ img = torch.unsqueeze(img, 0)
73
+ img = img.cuda()
74
+ out = net(img)[0]
75
+ parsing = out.squeeze(0).cpu().numpy().argmax(0)
76
+ # print(parsing)
77
+ print(np.unique(parsing))
78
+
79
+ vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+ if __name__ == "__main__":
88
+ evaluate(dspth='/home/zll/data/CelebAMask-HQ/test-img', cp='79999_iter.pth')
89
+
90
+
face-parse-bisent/train.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ from logger import setup_logger
5
+ from model import BiSeNet
6
+ from face_dataset import FaceMask
7
+ from loss import OhemCELoss
8
+ from evaluate import evaluate
9
+ from optimizer import Optimizer
10
+ import cv2
11
+ import numpy as np
12
+
13
+ import torch
14
+ import torch.nn as nn
15
+ from torch.utils.data import DataLoader
16
+ import torch.nn.functional as F
17
+ import torch.distributed as dist
18
+
19
+ import os
20
+ import os.path as osp
21
+ import logging
22
+ import time
23
+ import datetime
24
+ import argparse
25
+
26
+
27
+ respth = './res'
28
+ if not osp.exists(respth):
29
+ os.makedirs(respth)
30
+ logger = logging.getLogger()
31
+
32
+
33
+ def parse_args():
34
+ parse = argparse.ArgumentParser()
35
+ parse.add_argument(
36
+ '--local_rank',
37
+ dest = 'local_rank',
38
+ type = int,
39
+ default = -1,
40
+ )
41
+ return parse.parse_args()
42
+
43
+
44
+ def train():
45
+ args = parse_args()
46
+ torch.cuda.set_device(args.local_rank)
47
+ dist.init_process_group(
48
+ backend = 'nccl',
49
+ init_method = 'tcp://127.0.0.1:33241',
50
+ world_size = torch.cuda.device_count(),
51
+ rank=args.local_rank
52
+ )
53
+ setup_logger(respth)
54
+
55
+ # dataset
56
+ n_classes = 19
57
+ n_img_per_gpu = 16
58
+ n_workers = 8
59
+ cropsize = [448, 448]
60
+ data_root = '/home/zll/data/CelebAMask-HQ/'
61
+
62
+ ds = FaceMask(data_root, cropsize=cropsize, mode='train')
63
+ sampler = torch.utils.data.distributed.DistributedSampler(ds)
64
+ dl = DataLoader(ds,
65
+ batch_size = n_img_per_gpu,
66
+ shuffle = False,
67
+ sampler = sampler,
68
+ num_workers = n_workers,
69
+ pin_memory = True,
70
+ drop_last = True)
71
+
72
+ # model
73
+ ignore_idx = -100
74
+ net = BiSeNet(n_classes=n_classes)
75
+ net.cuda()
76
+ net.train()
77
+ net = nn.parallel.DistributedDataParallel(net,
78
+ device_ids = [args.local_rank, ],
79
+ output_device = args.local_rank
80
+ )
81
+ score_thres = 0.7
82
+ n_min = n_img_per_gpu * cropsize[0] * cropsize[1]//16
83
+ LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
84
+ Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
85
+ Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
86
+
87
+ ## optimizer
88
+ momentum = 0.9
89
+ weight_decay = 5e-4
90
+ lr_start = 1e-2
91
+ max_iter = 80000
92
+ power = 0.9
93
+ warmup_steps = 1000
94
+ warmup_start_lr = 1e-5
95
+ optim = Optimizer(
96
+ model = net.module,
97
+ lr0 = lr_start,
98
+ momentum = momentum,
99
+ wd = weight_decay,
100
+ warmup_steps = warmup_steps,
101
+ warmup_start_lr = warmup_start_lr,
102
+ max_iter = max_iter,
103
+ power = power)
104
+
105
+ ## train loop
106
+ msg_iter = 50
107
+ loss_avg = []
108
+ st = glob_st = time.time()
109
+ diter = iter(dl)
110
+ epoch = 0
111
+ for it in range(max_iter):
112
+ try:
113
+ im, lb = next(diter)
114
+ if not im.size()[0] == n_img_per_gpu:
115
+ raise StopIteration
116
+ except StopIteration:
117
+ epoch += 1
118
+ sampler.set_epoch(epoch)
119
+ diter = iter(dl)
120
+ im, lb = next(diter)
121
+ im = im.cuda()
122
+ lb = lb.cuda()
123
+ H, W = im.size()[2:]
124
+ lb = torch.squeeze(lb, 1)
125
+
126
+ optim.zero_grad()
127
+ out, out16, out32 = net(im)
128
+ lossp = LossP(out, lb)
129
+ loss2 = Loss2(out16, lb)
130
+ loss3 = Loss3(out32, lb)
131
+ loss = lossp + loss2 + loss3
132
+ loss.backward()
133
+ optim.step()
134
+
135
+ loss_avg.append(loss.item())
136
+
137
+ # print training log message
138
+ if (it+1) % msg_iter == 0:
139
+ loss_avg = sum(loss_avg) / len(loss_avg)
140
+ lr = optim.lr
141
+ ed = time.time()
142
+ t_intv, glob_t_intv = ed - st, ed - glob_st
143
+ eta = int((max_iter - it) * (glob_t_intv / it))
144
+ eta = str(datetime.timedelta(seconds=eta))
145
+ msg = ', '.join([
146
+ 'it: {it}/{max_it}',
147
+ 'lr: {lr:4f}',
148
+ 'loss: {loss:.4f}',
149
+ 'eta: {eta}',
150
+ 'time: {time:.4f}',
151
+ ]).format(
152
+ it = it+1,
153
+ max_it = max_iter,
154
+ lr = lr,
155
+ loss = loss_avg,
156
+ time = t_intv,
157
+ eta = eta
158
+ )
159
+ logger.info(msg)
160
+ loss_avg = []
161
+ st = ed
162
+ if dist.get_rank() == 0:
163
+ if (it+1) % 5000 == 0:
164
+ state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
165
+ if dist.get_rank() == 0:
166
+ torch.save(state, './res/cp/{}_iter.pth'.format(it))
167
+ evaluate(dspth='/home/zll/data/CelebAMask-HQ/test-img', cp='{}_iter.pth'.format(it))
168
+
169
+ # dump the final model
170
+ save_pth = osp.join(respth, 'model_final_diss.pth')
171
+ # net.cpu()
172
+ state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
173
+ if dist.get_rank() == 0:
174
+ torch.save(state, save_pth)
175
+ logger.info('training done, model saved to: {}'.format(save_pth))
176
+
177
+
178
+ if __name__ == "__main__":
179
+ train()