wanghaofan commited on
Commit
35ed688
1 Parent(s): 55b0e2c

Upload 10 files

Browse files
controlnet_aux/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __version__ = "0.0.9"
2
+
3
+ from .canny import CannyDetector
4
+ from .open_pose import OpenposeDetector
5
+
controlnet_aux/canny/__init__.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+ from ..util import HWC3, resize_image
6
+
7
+ class CannyDetector:
8
+ def __call__(self, input_image=None, low_threshold=100, high_threshold=200, detect_resolution=512, image_resolution=512, output_type=None, **kwargs):
9
+ if "img" in kwargs:
10
+ warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
11
+ input_image = kwargs.pop("img")
12
+
13
+ if input_image is None:
14
+ raise ValueError("input_image must be defined.")
15
+
16
+ if not isinstance(input_image, np.ndarray):
17
+ input_image = np.array(input_image, dtype=np.uint8)
18
+ output_type = output_type or "pil"
19
+ else:
20
+ output_type = output_type or "np"
21
+
22
+ input_image = HWC3(input_image)
23
+ input_image = resize_image(input_image, detect_resolution)
24
+
25
+ detected_map = cv2.Canny(input_image, low_threshold, high_threshold)
26
+ detected_map = HWC3(detected_map)
27
+
28
+ img = resize_image(input_image, image_resolution)
29
+ H, W, C = img.shape
30
+
31
+ detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
32
+
33
+ if output_type == "pil":
34
+ detected_map = Image.fromarray(detected_map)
35
+
36
+ return detected_map
controlnet_aux/open_pose/LICENSE ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENPOSE: MULTIPERSON KEYPOINT DETECTION
2
+ SOFTWARE LICENSE AGREEMENT
3
+ ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
4
+
5
+ BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
6
+
7
+ This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
8
+
9
+ RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
10
+ Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
11
+ non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
12
+
13
+ CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
14
+
15
+ COPYRIGHT: The Software is owned by Licensor and is protected by United
16
+ States copyright laws and applicable international treaties and/or conventions.
17
+
18
+ PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
19
+
20
+ DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
21
+
22
+ BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
23
+
24
+ USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “OpenPose", "Carnegie Mellon" or any renditions thereof without the prior written permission of Licensor.
25
+
26
+ You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
27
+
28
+ ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
29
+
30
+ TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
31
+
32
+ The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
33
+
34
+ FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
35
+
36
+ DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
37
+
38
+ SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
39
+
40
+ EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
41
+
42
+ EXPORT REGULATION: Licensee agrees to comply with any and all applicable
43
+ U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
44
+
45
+ SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
46
+
47
+ NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
48
+
49
+ GOVERNING LAW: This Agreement shall be construed and enforced in accordance with the laws of the Commonwealth of Pennsylvania without reference to conflict of laws principles. You consent to the personal jurisdiction of the courts of this County and waive their rights to venue outside of Allegheny County, Pennsylvania.
50
+
51
+ ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
52
+
53
+
54
+
55
+ ************************************************************************
56
+
57
+ THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
58
+
59
+ This project incorporates material from the project(s) listed below (collectively, "Third Party Code"). This Third Party Code is licensed to you under their original license terms set forth below. We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
60
+
61
+ 1. Caffe, version 1.0.0, (https://github.com/BVLC/caffe/)
62
+
63
+ COPYRIGHT
64
+
65
+ All contributions by the University of California:
66
+ Copyright (c) 2014-2017 The Regents of the University of California (Regents)
67
+ All rights reserved.
68
+
69
+ All other contributions:
70
+ Copyright (c) 2014-2017, the respective contributors
71
+ All rights reserved.
72
+
73
+ Caffe uses a shared copyright model: each contributor holds copyright over
74
+ their contributions to Caffe. The project versioning records all such
75
+ contribution and copyright details. If a contributor wants to further mark
76
+ their specific copyright on a particular contribution, they should indicate
77
+ their copyright solely in the commit message of the change when it is
78
+ committed.
79
+
80
+ LICENSE
81
+
82
+ Redistribution and use in source and binary forms, with or without
83
+ modification, are permitted provided that the following conditions are met:
84
+
85
+ 1. Redistributions of source code must retain the above copyright notice, this
86
+ list of conditions and the following disclaimer.
87
+ 2. Redistributions in binary form must reproduce the above copyright notice,
88
+ this list of conditions and the following disclaimer in the documentation
89
+ and/or other materials provided with the distribution.
90
+
91
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
92
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
93
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
94
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
95
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
96
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
97
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
98
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
99
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
100
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
101
+
102
+ CONTRIBUTION AGREEMENT
103
+
104
+ By contributing to the BVLC/caffe repository through pull-request, comment,
105
+ or otherwise, the contributor releases their content to the
106
+ license and copyright terms herein.
107
+
108
+ ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
controlnet_aux/open_pose/__init__.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Openpose
2
+ # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
3
+ # 2nd Edited by https://github.com/Hzzone/pytorch-openpose
4
+ # 3rd Edited by ControlNet
5
+ # 4th Edited by ControlNet (added face and correct hands)
6
+ # 5th Edited by ControlNet (Improved JSON serialization/deserialization, and lots of bug fixs)
7
+ # This preprocessor is licensed by CMU for non-commercial use only.
8
+
9
+
10
+ import os
11
+
12
+ os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
13
+
14
+ import json
15
+ import warnings
16
+ from typing import Callable, List, NamedTuple, Tuple, Union
17
+
18
+ import cv2
19
+ import numpy as np
20
+ import torch
21
+ from huggingface_hub import hf_hub_download
22
+ from PIL import Image
23
+
24
+ from ..util import HWC3, resize_image
25
+ from . import util
26
+ from .body import Body, BodyResult, Keypoint
27
+ from .face import Face
28
+ from .hand import Hand
29
+
30
+ HandResult = List[Keypoint]
31
+ FaceResult = List[Keypoint]
32
+
33
+ class PoseResult(NamedTuple):
34
+ body: BodyResult
35
+ left_hand: Union[HandResult, None]
36
+ right_hand: Union[HandResult, None]
37
+ face: Union[FaceResult, None]
38
+
39
+ def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True):
40
+ """
41
+ Draw the detected poses on an empty canvas.
42
+
43
+ Args:
44
+ poses (List[PoseResult]): A list of PoseResult objects containing the detected poses.
45
+ H (int): The height of the canvas.
46
+ W (int): The width of the canvas.
47
+ draw_body (bool, optional): Whether to draw body keypoints. Defaults to True.
48
+ draw_hand (bool, optional): Whether to draw hand keypoints. Defaults to True.
49
+ draw_face (bool, optional): Whether to draw face keypoints. Defaults to True.
50
+
51
+ Returns:
52
+ numpy.ndarray: A 3D numpy array representing the canvas with the drawn poses.
53
+ """
54
+ canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
55
+
56
+ for pose in poses:
57
+ if draw_body:
58
+ canvas = util.draw_bodypose(canvas, pose.body.keypoints)
59
+
60
+ if draw_hand:
61
+ canvas = util.draw_handpose(canvas, pose.left_hand)
62
+ canvas = util.draw_handpose(canvas, pose.right_hand)
63
+
64
+ if draw_face:
65
+ canvas = util.draw_facepose(canvas, pose.face)
66
+
67
+ return canvas
68
+
69
+
70
+ class OpenposeDetector:
71
+ """
72
+ A class for detecting human poses in images using the Openpose model.
73
+
74
+ Attributes:
75
+ model_dir (str): Path to the directory where the pose models are stored.
76
+ """
77
+ def __init__(self, body_estimation, hand_estimation=None, face_estimation=None):
78
+ self.body_estimation = body_estimation
79
+ self.hand_estimation = hand_estimation
80
+ self.face_estimation = face_estimation
81
+
82
+ @classmethod
83
+ def from_pretrained(cls, pretrained_model_or_path, filename=None, hand_filename=None, face_filename=None, cache_dir=None, local_files_only=False):
84
+
85
+ if pretrained_model_or_path == "lllyasviel/ControlNet":
86
+ filename = filename or "annotator/ckpts/body_pose_model.pth"
87
+ hand_filename = hand_filename or "annotator/ckpts/hand_pose_model.pth"
88
+ face_filename = face_filename or "facenet.pth"
89
+
90
+ face_pretrained_model_or_path = "lllyasviel/Annotators"
91
+ else:
92
+ filename = filename or "body_pose_model.pth"
93
+ hand_filename = hand_filename or "hand_pose_model.pth"
94
+ face_filename = face_filename or "facenet.pth"
95
+
96
+ face_pretrained_model_or_path = pretrained_model_or_path
97
+
98
+ if os.path.isdir(pretrained_model_or_path):
99
+ body_model_path = os.path.join(pretrained_model_or_path, filename)
100
+ hand_model_path = os.path.join(pretrained_model_or_path, hand_filename)
101
+ face_model_path = os.path.join(face_pretrained_model_or_path, face_filename)
102
+ else:
103
+ body_model_path = hf_hub_download(pretrained_model_or_path, filename, cache_dir=cache_dir, local_files_only=local_files_only)
104
+ hand_model_path = hf_hub_download(pretrained_model_or_path, hand_filename, cache_dir=cache_dir, local_files_only=local_files_only)
105
+ face_model_path = hf_hub_download(face_pretrained_model_or_path, face_filename, cache_dir=cache_dir, local_files_only=local_files_only)
106
+
107
+ body_estimation = Body(body_model_path)
108
+ hand_estimation = Hand(hand_model_path)
109
+ face_estimation = Face(face_model_path)
110
+
111
+ return cls(body_estimation, hand_estimation, face_estimation)
112
+
113
+ def to(self, device):
114
+ self.body_estimation.to(device)
115
+ self.hand_estimation.to(device)
116
+ self.face_estimation.to(device)
117
+ return self
118
+
119
+ def detect_hands(self, body: BodyResult, oriImg) -> Tuple[Union[HandResult, None], Union[HandResult, None]]:
120
+ left_hand = None
121
+ right_hand = None
122
+ H, W, _ = oriImg.shape
123
+ for x, y, w, is_left in util.handDetect(body, oriImg):
124
+ peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :]).astype(np.float32)
125
+ if peaks.ndim == 2 and peaks.shape[1] == 2:
126
+ peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float(W)
127
+ peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float(H)
128
+
129
+ hand_result = [
130
+ Keypoint(x=peak[0], y=peak[1])
131
+ for peak in peaks
132
+ ]
133
+
134
+ if is_left:
135
+ left_hand = hand_result
136
+ else:
137
+ right_hand = hand_result
138
+
139
+ return left_hand, right_hand
140
+
141
+ def detect_face(self, body: BodyResult, oriImg) -> Union[FaceResult, None]:
142
+ face = util.faceDetect(body, oriImg)
143
+ if face is None:
144
+ return None
145
+
146
+ x, y, w = face
147
+ H, W, _ = oriImg.shape
148
+ heatmaps = self.face_estimation(oriImg[y:y+w, x:x+w, :])
149
+ peaks = self.face_estimation.compute_peaks_from_heatmaps(heatmaps).astype(np.float32)
150
+ if peaks.ndim == 2 and peaks.shape[1] == 2:
151
+ peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float(W)
152
+ peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float(H)
153
+ return [
154
+ Keypoint(x=peak[0], y=peak[1])
155
+ for peak in peaks
156
+ ]
157
+
158
+ return None
159
+
160
+ def detect_poses(self, oriImg, include_hand=False, include_face=False) -> List[PoseResult]:
161
+ """
162
+ Detect poses in the given image.
163
+ Args:
164
+ oriImg (numpy.ndarray): The input image for pose detection.
165
+ include_hand (bool, optional): Whether to include hand detection. Defaults to False.
166
+ include_face (bool, optional): Whether to include face detection. Defaults to False.
167
+
168
+ Returns:
169
+ List[PoseResult]: A list of PoseResult objects containing the detected poses.
170
+ """
171
+ oriImg = oriImg[:, :, ::-1].copy()
172
+ H, W, C = oriImg.shape
173
+ with torch.no_grad():
174
+ candidate, subset = self.body_estimation(oriImg)
175
+ bodies = self.body_estimation.format_body_result(candidate, subset)
176
+
177
+ results = []
178
+ for body in bodies:
179
+ left_hand, right_hand, face = (None,) * 3
180
+ if include_hand:
181
+ left_hand, right_hand = self.detect_hands(body, oriImg)
182
+ if include_face:
183
+ face = self.detect_face(body, oriImg)
184
+
185
+ results.append(PoseResult(BodyResult(
186
+ keypoints=[
187
+ Keypoint(
188
+ x=keypoint.x / float(W),
189
+ y=keypoint.y / float(H)
190
+ ) if keypoint is not None else None
191
+ for keypoint in body.keypoints
192
+ ],
193
+ total_score=body.total_score,
194
+ total_parts=body.total_parts
195
+ ), left_hand, right_hand, face))
196
+
197
+ return results
198
+
199
+ def __call__(self, input_image, detect_resolution=512, image_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", **kwargs):
200
+ if hand_and_face is not None:
201
+ warnings.warn("hand_and_face is deprecated. Use include_hand and include_face instead.", DeprecationWarning)
202
+ include_hand = hand_and_face
203
+ include_face = hand_and_face
204
+
205
+ if "return_pil" in kwargs:
206
+ warnings.warn("return_pil is deprecated. Use output_type instead.", DeprecationWarning)
207
+ output_type = "pil" if kwargs["return_pil"] else "np"
208
+ if type(output_type) is bool:
209
+ warnings.warn("Passing `True` or `False` to `output_type` is deprecated and will raise an error in future versions")
210
+ if output_type:
211
+ output_type = "pil"
212
+
213
+ if not isinstance(input_image, np.ndarray):
214
+ input_image = np.array(input_image, dtype=np.uint8)
215
+
216
+ input_image = HWC3(input_image)
217
+ input_image = resize_image(input_image, detect_resolution)
218
+ H, W, C = input_image.shape
219
+
220
+ poses = self.detect_poses(input_image, include_hand, include_face)
221
+ canvas = draw_poses(poses, H, W, draw_body=include_body, draw_hand=include_hand, draw_face=include_face)
222
+
223
+ detected_map = canvas
224
+ detected_map = HWC3(detected_map)
225
+
226
+ img = resize_image(input_image, image_resolution)
227
+ H, W, C = img.shape
228
+
229
+ detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
230
+
231
+ if output_type == "pil":
232
+ detected_map = Image.fromarray(detected_map)
233
+
234
+ return detected_map
controlnet_aux/open_pose/body.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import List, NamedTuple, Union
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+ from scipy.ndimage.filters import gaussian_filter
8
+
9
+ from . import util
10
+ from .model import bodypose_model
11
+
12
+
13
+ class Keypoint(NamedTuple):
14
+ x: float
15
+ y: float
16
+ score: float = 1.0
17
+ id: int = -1
18
+
19
+
20
+ class BodyResult(NamedTuple):
21
+ # Note: Using `Union` instead of `|` operator as the ladder is a Python
22
+ # 3.10 feature.
23
+ # Annotator code should be Python 3.8 Compatible, as controlnet repo uses
24
+ # Python 3.8 environment.
25
+ # https://github.com/lllyasviel/ControlNet/blob/d3284fcd0972c510635a4f5abe2eeb71dc0de524/environment.yaml#L6
26
+ keypoints: List[Union[Keypoint, None]]
27
+ total_score: float
28
+ total_parts: int
29
+
30
+
31
+ class Body(object):
32
+ def __init__(self, model_path):
33
+ self.model = bodypose_model()
34
+ model_dict = util.transfer(self.model, torch.load(model_path))
35
+ self.model.load_state_dict(model_dict)
36
+ self.model.eval()
37
+
38
+ def to(self, device):
39
+ self.model.to(device)
40
+ return self
41
+
42
+ def __call__(self, oriImg):
43
+ device = next(iter(self.model.parameters())).device
44
+ # scale_search = [0.5, 1.0, 1.5, 2.0]
45
+ scale_search = [0.5]
46
+ boxsize = 368
47
+ stride = 8
48
+ padValue = 128
49
+ thre1 = 0.1
50
+ thre2 = 0.05
51
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
52
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
53
+ paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
54
+
55
+ for m in range(len(multiplier)):
56
+ scale = multiplier[m]
57
+ imageToTest = util.smart_resize_k(oriImg, fx=scale, fy=scale)
58
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
59
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
60
+ im = np.ascontiguousarray(im)
61
+
62
+ data = torch.from_numpy(im).float()
63
+ data = data.to(device)
64
+ # data = data.permute([2, 0, 1]).unsqueeze(0).float()
65
+ with torch.no_grad():
66
+ Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
67
+ Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
68
+ Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
69
+
70
+ # extract outputs, resize, and remove padding
71
+ # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
72
+ heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
73
+ heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride)
74
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
75
+ heatmap = util.smart_resize(heatmap, (oriImg.shape[0], oriImg.shape[1]))
76
+
77
+ # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
78
+ paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
79
+ paf = util.smart_resize_k(paf, fx=stride, fy=stride)
80
+ paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
81
+ paf = util.smart_resize(paf, (oriImg.shape[0], oriImg.shape[1]))
82
+
83
+ heatmap_avg += heatmap_avg + heatmap / len(multiplier)
84
+ paf_avg += + paf / len(multiplier)
85
+
86
+ all_peaks = []
87
+ peak_counter = 0
88
+
89
+ for part in range(18):
90
+ map_ori = heatmap_avg[:, :, part]
91
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
92
+
93
+ map_left = np.zeros(one_heatmap.shape)
94
+ map_left[1:, :] = one_heatmap[:-1, :]
95
+ map_right = np.zeros(one_heatmap.shape)
96
+ map_right[:-1, :] = one_heatmap[1:, :]
97
+ map_up = np.zeros(one_heatmap.shape)
98
+ map_up[:, 1:] = one_heatmap[:, :-1]
99
+ map_down = np.zeros(one_heatmap.shape)
100
+ map_down[:, :-1] = one_heatmap[:, 1:]
101
+
102
+ peaks_binary = np.logical_and.reduce(
103
+ (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
104
+ peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
105
+ peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
106
+ peak_id = range(peak_counter, peak_counter + len(peaks))
107
+ peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
108
+
109
+ all_peaks.append(peaks_with_score_and_id)
110
+ peak_counter += len(peaks)
111
+
112
+ # find connection in the specified sequence, center 29 is in the position 15
113
+ limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
114
+ [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
115
+ [1, 16], [16, 18], [3, 17], [6, 18]]
116
+ # the middle joints heatmap correpondence
117
+ mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
118
+ [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
119
+ [55, 56], [37, 38], [45, 46]]
120
+
121
+ connection_all = []
122
+ special_k = []
123
+ mid_num = 10
124
+
125
+ for k in range(len(mapIdx)):
126
+ score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
127
+ candA = all_peaks[limbSeq[k][0] - 1]
128
+ candB = all_peaks[limbSeq[k][1] - 1]
129
+ nA = len(candA)
130
+ nB = len(candB)
131
+ indexA, indexB = limbSeq[k]
132
+ if (nA != 0 and nB != 0):
133
+ connection_candidate = []
134
+ for i in range(nA):
135
+ for j in range(nB):
136
+ vec = np.subtract(candB[j][:2], candA[i][:2])
137
+ norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
138
+ norm = max(0.001, norm)
139
+ vec = np.divide(vec, norm)
140
+
141
+ startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
142
+ np.linspace(candA[i][1], candB[j][1], num=mid_num)))
143
+
144
+ vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
145
+ for I in range(len(startend))])
146
+ vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
147
+ for I in range(len(startend))])
148
+
149
+ score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
150
+ score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
151
+ 0.5 * oriImg.shape[0] / norm - 1, 0)
152
+ criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
153
+ criterion2 = score_with_dist_prior > 0
154
+ if criterion1 and criterion2:
155
+ connection_candidate.append(
156
+ [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
157
+
158
+ connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
159
+ connection = np.zeros((0, 5))
160
+ for c in range(len(connection_candidate)):
161
+ i, j, s = connection_candidate[c][0:3]
162
+ if (i not in connection[:, 3] and j not in connection[:, 4]):
163
+ connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
164
+ if (len(connection) >= min(nA, nB)):
165
+ break
166
+
167
+ connection_all.append(connection)
168
+ else:
169
+ special_k.append(k)
170
+ connection_all.append([])
171
+
172
+ # last number in each row is the total parts number of that person
173
+ # the second last number in each row is the score of the overall configuration
174
+ subset = -1 * np.ones((0, 20))
175
+ candidate = np.array([item for sublist in all_peaks for item in sublist])
176
+
177
+ for k in range(len(mapIdx)):
178
+ if k not in special_k:
179
+ partAs = connection_all[k][:, 0]
180
+ partBs = connection_all[k][:, 1]
181
+ indexA, indexB = np.array(limbSeq[k]) - 1
182
+
183
+ for i in range(len(connection_all[k])): # = 1:size(temp,1)
184
+ found = 0
185
+ subset_idx = [-1, -1]
186
+ for j in range(len(subset)): # 1:size(subset,1):
187
+ if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
188
+ subset_idx[found] = j
189
+ found += 1
190
+
191
+ if found == 1:
192
+ j = subset_idx[0]
193
+ if subset[j][indexB] != partBs[i]:
194
+ subset[j][indexB] = partBs[i]
195
+ subset[j][-1] += 1
196
+ subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
197
+ elif found == 2: # if found 2 and disjoint, merge them
198
+ j1, j2 = subset_idx
199
+ membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
200
+ if len(np.nonzero(membership == 2)[0]) == 0: # merge
201
+ subset[j1][:-2] += (subset[j2][:-2] + 1)
202
+ subset[j1][-2:] += subset[j2][-2:]
203
+ subset[j1][-2] += connection_all[k][i][2]
204
+ subset = np.delete(subset, j2, 0)
205
+ else: # as like found == 1
206
+ subset[j1][indexB] = partBs[i]
207
+ subset[j1][-1] += 1
208
+ subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
209
+
210
+ # if find no partA in the subset, create a new subset
211
+ elif not found and k < 17:
212
+ row = -1 * np.ones(20)
213
+ row[indexA] = partAs[i]
214
+ row[indexB] = partBs[i]
215
+ row[-1] = 2
216
+ row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
217
+ subset = np.vstack([subset, row])
218
+ # delete some rows of subset which has few parts occur
219
+ deleteIdx = []
220
+ for i in range(len(subset)):
221
+ if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
222
+ deleteIdx.append(i)
223
+ subset = np.delete(subset, deleteIdx, axis=0)
224
+
225
+ # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
226
+ # candidate: x, y, score, id
227
+ return candidate, subset
228
+
229
+ @staticmethod
230
+ def format_body_result(candidate: np.ndarray, subset: np.ndarray) -> List[BodyResult]:
231
+ """
232
+ Format the body results from the candidate and subset arrays into a list of BodyResult objects.
233
+
234
+ Args:
235
+ candidate (np.ndarray): An array of candidates containing the x, y coordinates, score, and id
236
+ for each body part.
237
+ subset (np.ndarray): An array of subsets containing indices to the candidate array for each
238
+ person detected. The last two columns of each row hold the total score and total parts
239
+ of the person.
240
+
241
+ Returns:
242
+ List[BodyResult]: A list of BodyResult objects, where each object represents a person with
243
+ detected keypoints, total score, and total parts.
244
+ """
245
+ return [
246
+ BodyResult(
247
+ keypoints=[
248
+ Keypoint(
249
+ x=candidate[candidate_index][0],
250
+ y=candidate[candidate_index][1],
251
+ score=candidate[candidate_index][2],
252
+ id=candidate[candidate_index][3]
253
+ ) if candidate_index != -1 else None
254
+ for candidate_index in person[:18].astype(int)
255
+ ],
256
+ total_score=person[18],
257
+ total_parts=person[19]
258
+ )
259
+ for person in subset
260
+ ]
controlnet_aux/open_pose/face.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from torch.nn import Conv2d, MaxPool2d, Module, ReLU, init
7
+ from torchvision.transforms import ToPILImage, ToTensor
8
+
9
+ from . import util
10
+
11
+
12
+ class FaceNet(Module):
13
+ """Model the cascading heatmaps. """
14
+ def __init__(self):
15
+ super(FaceNet, self).__init__()
16
+ # cnn to make feature map
17
+ self.relu = ReLU()
18
+ self.max_pooling_2d = MaxPool2d(kernel_size=2, stride=2)
19
+ self.conv1_1 = Conv2d(in_channels=3, out_channels=64,
20
+ kernel_size=3, stride=1, padding=1)
21
+ self.conv1_2 = Conv2d(
22
+ in_channels=64, out_channels=64, kernel_size=3, stride=1,
23
+ padding=1)
24
+ self.conv2_1 = Conv2d(
25
+ in_channels=64, out_channels=128, kernel_size=3, stride=1,
26
+ padding=1)
27
+ self.conv2_2 = Conv2d(
28
+ in_channels=128, out_channels=128, kernel_size=3, stride=1,
29
+ padding=1)
30
+ self.conv3_1 = Conv2d(
31
+ in_channels=128, out_channels=256, kernel_size=3, stride=1,
32
+ padding=1)
33
+ self.conv3_2 = Conv2d(
34
+ in_channels=256, out_channels=256, kernel_size=3, stride=1,
35
+ padding=1)
36
+ self.conv3_3 = Conv2d(
37
+ in_channels=256, out_channels=256, kernel_size=3, stride=1,
38
+ padding=1)
39
+ self.conv3_4 = Conv2d(
40
+ in_channels=256, out_channels=256, kernel_size=3, stride=1,
41
+ padding=1)
42
+ self.conv4_1 = Conv2d(
43
+ in_channels=256, out_channels=512, kernel_size=3, stride=1,
44
+ padding=1)
45
+ self.conv4_2 = Conv2d(
46
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
47
+ padding=1)
48
+ self.conv4_3 = Conv2d(
49
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
50
+ padding=1)
51
+ self.conv4_4 = Conv2d(
52
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
53
+ padding=1)
54
+ self.conv5_1 = Conv2d(
55
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
56
+ padding=1)
57
+ self.conv5_2 = Conv2d(
58
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
59
+ padding=1)
60
+ self.conv5_3_CPM = Conv2d(
61
+ in_channels=512, out_channels=128, kernel_size=3, stride=1,
62
+ padding=1)
63
+
64
+ # stage1
65
+ self.conv6_1_CPM = Conv2d(
66
+ in_channels=128, out_channels=512, kernel_size=1, stride=1,
67
+ padding=0)
68
+ self.conv6_2_CPM = Conv2d(
69
+ in_channels=512, out_channels=71, kernel_size=1, stride=1,
70
+ padding=0)
71
+
72
+ # stage2
73
+ self.Mconv1_stage2 = Conv2d(
74
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
75
+ padding=3)
76
+ self.Mconv2_stage2 = Conv2d(
77
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
78
+ padding=3)
79
+ self.Mconv3_stage2 = Conv2d(
80
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
81
+ padding=3)
82
+ self.Mconv4_stage2 = Conv2d(
83
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
84
+ padding=3)
85
+ self.Mconv5_stage2 = Conv2d(
86
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
87
+ padding=3)
88
+ self.Mconv6_stage2 = Conv2d(
89
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
90
+ padding=0)
91
+ self.Mconv7_stage2 = Conv2d(
92
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
93
+ padding=0)
94
+
95
+ # stage3
96
+ self.Mconv1_stage3 = Conv2d(
97
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
98
+ padding=3)
99
+ self.Mconv2_stage3 = Conv2d(
100
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
101
+ padding=3)
102
+ self.Mconv3_stage3 = Conv2d(
103
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
104
+ padding=3)
105
+ self.Mconv4_stage3 = Conv2d(
106
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
107
+ padding=3)
108
+ self.Mconv5_stage3 = Conv2d(
109
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
110
+ padding=3)
111
+ self.Mconv6_stage3 = Conv2d(
112
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
113
+ padding=0)
114
+ self.Mconv7_stage3 = Conv2d(
115
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
116
+ padding=0)
117
+
118
+ # stage4
119
+ self.Mconv1_stage4 = Conv2d(
120
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
121
+ padding=3)
122
+ self.Mconv2_stage4 = Conv2d(
123
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
124
+ padding=3)
125
+ self.Mconv3_stage4 = Conv2d(
126
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
127
+ padding=3)
128
+ self.Mconv4_stage4 = Conv2d(
129
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
130
+ padding=3)
131
+ self.Mconv5_stage4 = Conv2d(
132
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
133
+ padding=3)
134
+ self.Mconv6_stage4 = Conv2d(
135
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
136
+ padding=0)
137
+ self.Mconv7_stage4 = Conv2d(
138
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
139
+ padding=0)
140
+
141
+ # stage5
142
+ self.Mconv1_stage5 = Conv2d(
143
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
144
+ padding=3)
145
+ self.Mconv2_stage5 = Conv2d(
146
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
147
+ padding=3)
148
+ self.Mconv3_stage5 = Conv2d(
149
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
150
+ padding=3)
151
+ self.Mconv4_stage5 = Conv2d(
152
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
153
+ padding=3)
154
+ self.Mconv5_stage5 = Conv2d(
155
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
156
+ padding=3)
157
+ self.Mconv6_stage5 = Conv2d(
158
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
159
+ padding=0)
160
+ self.Mconv7_stage5 = Conv2d(
161
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
162
+ padding=0)
163
+
164
+ # stage6
165
+ self.Mconv1_stage6 = Conv2d(
166
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
167
+ padding=3)
168
+ self.Mconv2_stage6 = Conv2d(
169
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
170
+ padding=3)
171
+ self.Mconv3_stage6 = Conv2d(
172
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
173
+ padding=3)
174
+ self.Mconv4_stage6 = Conv2d(
175
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
176
+ padding=3)
177
+ self.Mconv5_stage6 = Conv2d(
178
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
179
+ padding=3)
180
+ self.Mconv6_stage6 = Conv2d(
181
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
182
+ padding=0)
183
+ self.Mconv7_stage6 = Conv2d(
184
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
185
+ padding=0)
186
+
187
+ for m in self.modules():
188
+ if isinstance(m, Conv2d):
189
+ init.constant_(m.bias, 0)
190
+
191
+ def forward(self, x):
192
+ """Return a list of heatmaps."""
193
+ heatmaps = []
194
+
195
+ h = self.relu(self.conv1_1(x))
196
+ h = self.relu(self.conv1_2(h))
197
+ h = self.max_pooling_2d(h)
198
+ h = self.relu(self.conv2_1(h))
199
+ h = self.relu(self.conv2_2(h))
200
+ h = self.max_pooling_2d(h)
201
+ h = self.relu(self.conv3_1(h))
202
+ h = self.relu(self.conv3_2(h))
203
+ h = self.relu(self.conv3_3(h))
204
+ h = self.relu(self.conv3_4(h))
205
+ h = self.max_pooling_2d(h)
206
+ h = self.relu(self.conv4_1(h))
207
+ h = self.relu(self.conv4_2(h))
208
+ h = self.relu(self.conv4_3(h))
209
+ h = self.relu(self.conv4_4(h))
210
+ h = self.relu(self.conv5_1(h))
211
+ h = self.relu(self.conv5_2(h))
212
+ h = self.relu(self.conv5_3_CPM(h))
213
+ feature_map = h
214
+
215
+ # stage1
216
+ h = self.relu(self.conv6_1_CPM(h))
217
+ h = self.conv6_2_CPM(h)
218
+ heatmaps.append(h)
219
+
220
+ # stage2
221
+ h = torch.cat([h, feature_map], dim=1) # channel concat
222
+ h = self.relu(self.Mconv1_stage2(h))
223
+ h = self.relu(self.Mconv2_stage2(h))
224
+ h = self.relu(self.Mconv3_stage2(h))
225
+ h = self.relu(self.Mconv4_stage2(h))
226
+ h = self.relu(self.Mconv5_stage2(h))
227
+ h = self.relu(self.Mconv6_stage2(h))
228
+ h = self.Mconv7_stage2(h)
229
+ heatmaps.append(h)
230
+
231
+ # stage3
232
+ h = torch.cat([h, feature_map], dim=1) # channel concat
233
+ h = self.relu(self.Mconv1_stage3(h))
234
+ h = self.relu(self.Mconv2_stage3(h))
235
+ h = self.relu(self.Mconv3_stage3(h))
236
+ h = self.relu(self.Mconv4_stage3(h))
237
+ h = self.relu(self.Mconv5_stage3(h))
238
+ h = self.relu(self.Mconv6_stage3(h))
239
+ h = self.Mconv7_stage3(h)
240
+ heatmaps.append(h)
241
+
242
+ # stage4
243
+ h = torch.cat([h, feature_map], dim=1) # channel concat
244
+ h = self.relu(self.Mconv1_stage4(h))
245
+ h = self.relu(self.Mconv2_stage4(h))
246
+ h = self.relu(self.Mconv3_stage4(h))
247
+ h = self.relu(self.Mconv4_stage4(h))
248
+ h = self.relu(self.Mconv5_stage4(h))
249
+ h = self.relu(self.Mconv6_stage4(h))
250
+ h = self.Mconv7_stage4(h)
251
+ heatmaps.append(h)
252
+
253
+ # stage5
254
+ h = torch.cat([h, feature_map], dim=1) # channel concat
255
+ h = self.relu(self.Mconv1_stage5(h))
256
+ h = self.relu(self.Mconv2_stage5(h))
257
+ h = self.relu(self.Mconv3_stage5(h))
258
+ h = self.relu(self.Mconv4_stage5(h))
259
+ h = self.relu(self.Mconv5_stage5(h))
260
+ h = self.relu(self.Mconv6_stage5(h))
261
+ h = self.Mconv7_stage5(h)
262
+ heatmaps.append(h)
263
+
264
+ # stage6
265
+ h = torch.cat([h, feature_map], dim=1) # channel concat
266
+ h = self.relu(self.Mconv1_stage6(h))
267
+ h = self.relu(self.Mconv2_stage6(h))
268
+ h = self.relu(self.Mconv3_stage6(h))
269
+ h = self.relu(self.Mconv4_stage6(h))
270
+ h = self.relu(self.Mconv5_stage6(h))
271
+ h = self.relu(self.Mconv6_stage6(h))
272
+ h = self.Mconv7_stage6(h)
273
+ heatmaps.append(h)
274
+
275
+ return heatmaps
276
+
277
+
278
+ LOG = logging.getLogger(__name__)
279
+ TOTEN = ToTensor()
280
+ TOPIL = ToPILImage()
281
+
282
+
283
+ params = {
284
+ 'gaussian_sigma': 2.5,
285
+ 'inference_img_size': 736, # 368, 736, 1312
286
+ 'heatmap_peak_thresh': 0.1,
287
+ 'crop_scale': 1.5,
288
+ 'line_indices': [
289
+ [0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
290
+ [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], [12, 13],
291
+ [13, 14], [14, 15], [15, 16],
292
+ [17, 18], [18, 19], [19, 20], [20, 21],
293
+ [22, 23], [23, 24], [24, 25], [25, 26],
294
+ [27, 28], [28, 29], [29, 30],
295
+ [31, 32], [32, 33], [33, 34], [34, 35],
296
+ [36, 37], [37, 38], [38, 39], [39, 40], [40, 41], [41, 36],
297
+ [42, 43], [43, 44], [44, 45], [45, 46], [46, 47], [47, 42],
298
+ [48, 49], [49, 50], [50, 51], [51, 52], [52, 53], [53, 54],
299
+ [54, 55], [55, 56], [56, 57], [57, 58], [58, 59], [59, 48],
300
+ [60, 61], [61, 62], [62, 63], [63, 64], [64, 65], [65, 66],
301
+ [66, 67], [67, 60]
302
+ ],
303
+ }
304
+
305
+
306
+ class Face(object):
307
+ """
308
+ The OpenPose face landmark detector model.
309
+
310
+ Args:
311
+ inference_size: set the size of the inference image size, suggested:
312
+ 368, 736, 1312, default 736
313
+ gaussian_sigma: blur the heatmaps, default 2.5
314
+ heatmap_peak_thresh: return landmark if over threshold, default 0.1
315
+
316
+ """
317
+ def __init__(self, face_model_path,
318
+ inference_size=None,
319
+ gaussian_sigma=None,
320
+ heatmap_peak_thresh=None):
321
+ self.inference_size = inference_size or params["inference_img_size"]
322
+ self.sigma = gaussian_sigma or params['gaussian_sigma']
323
+ self.threshold = heatmap_peak_thresh or params["heatmap_peak_thresh"]
324
+ self.model = FaceNet()
325
+ self.model.load_state_dict(torch.load(face_model_path))
326
+ self.model.eval()
327
+
328
+ def to(self, device):
329
+ self.model.to(device)
330
+ return self
331
+
332
+ def __call__(self, face_img):
333
+ device = next(iter(self.model.parameters())).device
334
+ H, W, C = face_img.shape
335
+
336
+ w_size = 384
337
+ x_data = torch.from_numpy(util.smart_resize(face_img, (w_size, w_size))).permute([2, 0, 1]) / 256.0 - 0.5
338
+
339
+ x_data = x_data.to(device)
340
+
341
+ with torch.no_grad():
342
+ hs = self.model(x_data[None, ...])
343
+ heatmaps = F.interpolate(
344
+ hs[-1],
345
+ (H, W),
346
+ mode='bilinear', align_corners=True).cpu().numpy()[0]
347
+ return heatmaps
348
+
349
+ def compute_peaks_from_heatmaps(self, heatmaps):
350
+ all_peaks = []
351
+ for part in range(heatmaps.shape[0]):
352
+ map_ori = heatmaps[part].copy()
353
+ binary = np.ascontiguousarray(map_ori > 0.05, dtype=np.uint8)
354
+
355
+ if np.sum(binary) == 0:
356
+ continue
357
+
358
+ positions = np.where(binary > 0.5)
359
+ intensities = map_ori[positions]
360
+ mi = np.argmax(intensities)
361
+ y, x = positions[0][mi], positions[1][mi]
362
+ all_peaks.append([x, y])
363
+
364
+ return np.array(all_peaks)
controlnet_aux/open_pose/hand.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ from scipy.ndimage.filters import gaussian_filter
5
+ from skimage.measure import label
6
+
7
+ from . import util
8
+ from .model import handpose_model
9
+
10
+
11
+ class Hand(object):
12
+ def __init__(self, model_path):
13
+ self.model = handpose_model()
14
+ model_dict = util.transfer(self.model, torch.load(model_path))
15
+ self.model.load_state_dict(model_dict)
16
+ self.model.eval()
17
+
18
+ def to(self, device):
19
+ self.model.to(device)
20
+ return self
21
+
22
+ def __call__(self, oriImgRaw):
23
+ device = next(iter(self.model.parameters())).device
24
+ scale_search = [0.5, 1.0, 1.5, 2.0]
25
+ # scale_search = [0.5]
26
+ boxsize = 368
27
+ stride = 8
28
+ padValue = 128
29
+ thre = 0.05
30
+ multiplier = [x * boxsize for x in scale_search]
31
+
32
+ wsize = 128
33
+ heatmap_avg = np.zeros((wsize, wsize, 22))
34
+
35
+ Hr, Wr, Cr = oriImgRaw.shape
36
+
37
+ oriImg = cv2.GaussianBlur(oriImgRaw, (0, 0), 0.8)
38
+
39
+ for m in range(len(multiplier)):
40
+ scale = multiplier[m]
41
+ imageToTest = util.smart_resize(oriImg, (scale, scale))
42
+
43
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
44
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
45
+ im = np.ascontiguousarray(im)
46
+
47
+ data = torch.from_numpy(im).float()
48
+ data = data.to(device)
49
+
50
+ with torch.no_grad():
51
+ output = self.model(data).cpu().numpy()
52
+
53
+ # extract outputs, resize, and remove padding
54
+ heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
55
+ heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride)
56
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
57
+ heatmap = util.smart_resize(heatmap, (wsize, wsize))
58
+
59
+ heatmap_avg += heatmap / len(multiplier)
60
+
61
+ all_peaks = []
62
+ for part in range(21):
63
+ map_ori = heatmap_avg[:, :, part]
64
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
65
+ binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
66
+
67
+ if np.sum(binary) == 0:
68
+ all_peaks.append([0, 0])
69
+ continue
70
+ label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
71
+ max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
72
+ label_img[label_img != max_index] = 0
73
+ map_ori[label_img == 0] = 0
74
+
75
+ y, x = util.npmax(map_ori)
76
+ y = int(float(y) * float(Hr) / float(wsize))
77
+ x = int(float(x) * float(Wr) / float(wsize))
78
+ all_peaks.append([x, y])
79
+ return np.array(all_peaks)
80
+
81
+ if __name__ == "__main__":
82
+ hand_estimation = Hand('../model/hand_pose_model.pth')
83
+
84
+ # test_image = '../images/hand.jpg'
85
+ test_image = '../images/hand.jpg'
86
+ oriImg = cv2.imread(test_image) # B,G,R order
87
+ peaks = hand_estimation(oriImg)
88
+ canvas = util.draw_handpose(oriImg, peaks, True)
89
+ cv2.imshow('', canvas)
90
+ cv2.waitKey(0)
controlnet_aux/open_pose/model.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from collections import OrderedDict
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ def make_layers(block, no_relu_layers):
8
+ layers = []
9
+ for layer_name, v in block.items():
10
+ if 'pool' in layer_name:
11
+ layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
12
+ padding=v[2])
13
+ layers.append((layer_name, layer))
14
+ else:
15
+ conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
16
+ kernel_size=v[2], stride=v[3],
17
+ padding=v[4])
18
+ layers.append((layer_name, conv2d))
19
+ if layer_name not in no_relu_layers:
20
+ layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
21
+
22
+ return nn.Sequential(OrderedDict(layers))
23
+
24
+ class bodypose_model(nn.Module):
25
+ def __init__(self):
26
+ super(bodypose_model, self).__init__()
27
+
28
+ # these layers have no relu layer
29
+ no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
30
+ 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
31
+ 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
32
+ 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
33
+ blocks = {}
34
+ block0 = OrderedDict([
35
+ ('conv1_1', [3, 64, 3, 1, 1]),
36
+ ('conv1_2', [64, 64, 3, 1, 1]),
37
+ ('pool1_stage1', [2, 2, 0]),
38
+ ('conv2_1', [64, 128, 3, 1, 1]),
39
+ ('conv2_2', [128, 128, 3, 1, 1]),
40
+ ('pool2_stage1', [2, 2, 0]),
41
+ ('conv3_1', [128, 256, 3, 1, 1]),
42
+ ('conv3_2', [256, 256, 3, 1, 1]),
43
+ ('conv3_3', [256, 256, 3, 1, 1]),
44
+ ('conv3_4', [256, 256, 3, 1, 1]),
45
+ ('pool3_stage1', [2, 2, 0]),
46
+ ('conv4_1', [256, 512, 3, 1, 1]),
47
+ ('conv4_2', [512, 512, 3, 1, 1]),
48
+ ('conv4_3_CPM', [512, 256, 3, 1, 1]),
49
+ ('conv4_4_CPM', [256, 128, 3, 1, 1])
50
+ ])
51
+
52
+
53
+ # Stage 1
54
+ block1_1 = OrderedDict([
55
+ ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
56
+ ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
57
+ ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
58
+ ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
59
+ ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
60
+ ])
61
+
62
+ block1_2 = OrderedDict([
63
+ ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
64
+ ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
65
+ ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
66
+ ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
67
+ ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
68
+ ])
69
+ blocks['block1_1'] = block1_1
70
+ blocks['block1_2'] = block1_2
71
+
72
+ self.model0 = make_layers(block0, no_relu_layers)
73
+
74
+ # Stages 2 - 6
75
+ for i in range(2, 7):
76
+ blocks['block%d_1' % i] = OrderedDict([
77
+ ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
78
+ ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
79
+ ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
80
+ ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
81
+ ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
82
+ ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
83
+ ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
84
+ ])
85
+
86
+ blocks['block%d_2' % i] = OrderedDict([
87
+ ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
88
+ ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
89
+ ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
90
+ ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
91
+ ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
92
+ ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
93
+ ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
94
+ ])
95
+
96
+ for k in blocks.keys():
97
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
98
+
99
+ self.model1_1 = blocks['block1_1']
100
+ self.model2_1 = blocks['block2_1']
101
+ self.model3_1 = blocks['block3_1']
102
+ self.model4_1 = blocks['block4_1']
103
+ self.model5_1 = blocks['block5_1']
104
+ self.model6_1 = blocks['block6_1']
105
+
106
+ self.model1_2 = blocks['block1_2']
107
+ self.model2_2 = blocks['block2_2']
108
+ self.model3_2 = blocks['block3_2']
109
+ self.model4_2 = blocks['block4_2']
110
+ self.model5_2 = blocks['block5_2']
111
+ self.model6_2 = blocks['block6_2']
112
+
113
+
114
+ def forward(self, x):
115
+
116
+ out1 = self.model0(x)
117
+
118
+ out1_1 = self.model1_1(out1)
119
+ out1_2 = self.model1_2(out1)
120
+ out2 = torch.cat([out1_1, out1_2, out1], 1)
121
+
122
+ out2_1 = self.model2_1(out2)
123
+ out2_2 = self.model2_2(out2)
124
+ out3 = torch.cat([out2_1, out2_2, out1], 1)
125
+
126
+ out3_1 = self.model3_1(out3)
127
+ out3_2 = self.model3_2(out3)
128
+ out4 = torch.cat([out3_1, out3_2, out1], 1)
129
+
130
+ out4_1 = self.model4_1(out4)
131
+ out4_2 = self.model4_2(out4)
132
+ out5 = torch.cat([out4_1, out4_2, out1], 1)
133
+
134
+ out5_1 = self.model5_1(out5)
135
+ out5_2 = self.model5_2(out5)
136
+ out6 = torch.cat([out5_1, out5_2, out1], 1)
137
+
138
+ out6_1 = self.model6_1(out6)
139
+ out6_2 = self.model6_2(out6)
140
+
141
+ return out6_1, out6_2
142
+
143
+ class handpose_model(nn.Module):
144
+ def __init__(self):
145
+ super(handpose_model, self).__init__()
146
+
147
+ # these layers have no relu layer
148
+ no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
149
+ 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
150
+ # stage 1
151
+ block1_0 = OrderedDict([
152
+ ('conv1_1', [3, 64, 3, 1, 1]),
153
+ ('conv1_2', [64, 64, 3, 1, 1]),
154
+ ('pool1_stage1', [2, 2, 0]),
155
+ ('conv2_1', [64, 128, 3, 1, 1]),
156
+ ('conv2_2', [128, 128, 3, 1, 1]),
157
+ ('pool2_stage1', [2, 2, 0]),
158
+ ('conv3_1', [128, 256, 3, 1, 1]),
159
+ ('conv3_2', [256, 256, 3, 1, 1]),
160
+ ('conv3_3', [256, 256, 3, 1, 1]),
161
+ ('conv3_4', [256, 256, 3, 1, 1]),
162
+ ('pool3_stage1', [2, 2, 0]),
163
+ ('conv4_1', [256, 512, 3, 1, 1]),
164
+ ('conv4_2', [512, 512, 3, 1, 1]),
165
+ ('conv4_3', [512, 512, 3, 1, 1]),
166
+ ('conv4_4', [512, 512, 3, 1, 1]),
167
+ ('conv5_1', [512, 512, 3, 1, 1]),
168
+ ('conv5_2', [512, 512, 3, 1, 1]),
169
+ ('conv5_3_CPM', [512, 128, 3, 1, 1])
170
+ ])
171
+
172
+ block1_1 = OrderedDict([
173
+ ('conv6_1_CPM', [128, 512, 1, 1, 0]),
174
+ ('conv6_2_CPM', [512, 22, 1, 1, 0])
175
+ ])
176
+
177
+ blocks = {}
178
+ blocks['block1_0'] = block1_0
179
+ blocks['block1_1'] = block1_1
180
+
181
+ # stage 2-6
182
+ for i in range(2, 7):
183
+ blocks['block%d' % i] = OrderedDict([
184
+ ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
185
+ ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
186
+ ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
187
+ ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
188
+ ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
189
+ ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
190
+ ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
191
+ ])
192
+
193
+ for k in blocks.keys():
194
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
195
+
196
+ self.model1_0 = blocks['block1_0']
197
+ self.model1_1 = blocks['block1_1']
198
+ self.model2 = blocks['block2']
199
+ self.model3 = blocks['block3']
200
+ self.model4 = blocks['block4']
201
+ self.model5 = blocks['block5']
202
+ self.model6 = blocks['block6']
203
+
204
+ def forward(self, x):
205
+ out1_0 = self.model1_0(x)
206
+ out1_1 = self.model1_1(out1_0)
207
+ concat_stage2 = torch.cat([out1_1, out1_0], 1)
208
+ out_stage2 = self.model2(concat_stage2)
209
+ concat_stage3 = torch.cat([out_stage2, out1_0], 1)
210
+ out_stage3 = self.model3(concat_stage3)
211
+ concat_stage4 = torch.cat([out_stage3, out1_0], 1)
212
+ out_stage4 = self.model4(concat_stage4)
213
+ concat_stage5 = torch.cat([out_stage4, out1_0], 1)
214
+ out_stage5 = self.model5(concat_stage5)
215
+ concat_stage6 = torch.cat([out_stage5, out1_0], 1)
216
+ out_stage6 = self.model6(concat_stage6)
217
+ return out_stage6
controlnet_aux/open_pose/util.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import cv2
4
+ from typing import List, Tuple, Union
5
+
6
+ from .body import BodyResult, Keypoint
7
+
8
+ eps = 0.01
9
+
10
+
11
+ def smart_resize(x, s):
12
+ Ht, Wt = s
13
+ if x.ndim == 2:
14
+ Ho, Wo = x.shape
15
+ Co = 1
16
+ else:
17
+ Ho, Wo, Co = x.shape
18
+ if Co == 3 or Co == 1:
19
+ k = float(Ht + Wt) / float(Ho + Wo)
20
+ return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
21
+ else:
22
+ return np.stack([smart_resize(x[:, :, i], s) for i in range(Co)], axis=2)
23
+
24
+
25
+ def smart_resize_k(x, fx, fy):
26
+ if x.ndim == 2:
27
+ Ho, Wo = x.shape
28
+ Co = 1
29
+ else:
30
+ Ho, Wo, Co = x.shape
31
+ Ht, Wt = Ho * fy, Wo * fx
32
+ if Co == 3 or Co == 1:
33
+ k = float(Ht + Wt) / float(Ho + Wo)
34
+ return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
35
+ else:
36
+ return np.stack([smart_resize_k(x[:, :, i], fx, fy) for i in range(Co)], axis=2)
37
+
38
+
39
+ def padRightDownCorner(img, stride, padValue):
40
+ h = img.shape[0]
41
+ w = img.shape[1]
42
+
43
+ pad = 4 * [None]
44
+ pad[0] = 0 # up
45
+ pad[1] = 0 # left
46
+ pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
47
+ pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
48
+
49
+ img_padded = img
50
+ pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
51
+ img_padded = np.concatenate((pad_up, img_padded), axis=0)
52
+ pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
53
+ img_padded = np.concatenate((pad_left, img_padded), axis=1)
54
+ pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
55
+ img_padded = np.concatenate((img_padded, pad_down), axis=0)
56
+ pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
57
+ img_padded = np.concatenate((img_padded, pad_right), axis=1)
58
+
59
+ return img_padded, pad
60
+
61
+
62
+ def transfer(model, model_weights):
63
+ transfered_model_weights = {}
64
+ for weights_name in model.state_dict().keys():
65
+ transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
66
+ return transfered_model_weights
67
+
68
+
69
+ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
70
+ """
71
+ Draw keypoints and limbs representing body pose on a given canvas.
72
+
73
+ Args:
74
+ canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose.
75
+ keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn.
76
+
77
+ Returns:
78
+ np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose.
79
+
80
+ Note:
81
+ The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
82
+ """
83
+ H, W, C = canvas.shape
84
+ stickwidth = 4
85
+
86
+ limbSeq = [
87
+ [2, 3], [2, 6], [3, 4], [4, 5],
88
+ [6, 7], [7, 8], [2, 9], [9, 10],
89
+ [10, 11], [2, 12], [12, 13], [13, 14],
90
+ [2, 1], [1, 15], [15, 17], [1, 16],
91
+ [16, 18],
92
+ ]
93
+
94
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
95
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
96
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
97
+
98
+ for (k1_index, k2_index), color in zip(limbSeq, colors):
99
+ keypoint1 = keypoints[k1_index - 1]
100
+ keypoint2 = keypoints[k2_index - 1]
101
+
102
+ if keypoint1 is None or keypoint2 is None:
103
+ continue
104
+
105
+ Y = np.array([keypoint1.x, keypoint2.x]) * float(W)
106
+ X = np.array([keypoint1.y, keypoint2.y]) * float(H)
107
+ mX = np.mean(X)
108
+ mY = np.mean(Y)
109
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
110
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
111
+ polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
112
+ cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color])
113
+
114
+ for keypoint, color in zip(keypoints, colors):
115
+ if keypoint is None:
116
+ continue
117
+
118
+ x, y = keypoint.x, keypoint.y
119
+ x = int(x * W)
120
+ y = int(y * H)
121
+ cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1)
122
+
123
+ return canvas
124
+
125
+
126
+ def draw_handpose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray:
127
+ import matplotlib
128
+ """
129
+ Draw keypoints and connections representing hand pose on a given canvas.
130
+
131
+ Args:
132
+ canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
133
+ keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
134
+ or None if no keypoints are present.
135
+
136
+ Returns:
137
+ np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
138
+
139
+ Note:
140
+ The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
141
+ """
142
+ if not keypoints:
143
+ return canvas
144
+
145
+ H, W, C = canvas.shape
146
+
147
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
148
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
149
+
150
+ for ie, (e1, e2) in enumerate(edges):
151
+ k1 = keypoints[e1]
152
+ k2 = keypoints[e2]
153
+ if k1 is None or k2 is None:
154
+ continue
155
+
156
+ x1 = int(k1.x * W)
157
+ y1 = int(k1.y * H)
158
+ x2 = int(k2.x * W)
159
+ y2 = int(k2.y * H)
160
+ if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
161
+ cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2)
162
+
163
+ for keypoint in keypoints:
164
+ x, y = keypoint.x, keypoint.y
165
+ x = int(x * W)
166
+ y = int(y * H)
167
+ if x > eps and y > eps:
168
+ cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
169
+ return canvas
170
+
171
+
172
+ def draw_facepose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray:
173
+ """
174
+ Draw keypoints representing face pose on a given canvas.
175
+
176
+ Args:
177
+ canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the face pose.
178
+ keypoints (List[Keypoint]| None): A list of Keypoint objects representing the face keypoints to be drawn
179
+ or None if no keypoints are present.
180
+
181
+ Returns:
182
+ np.ndarray: A 3D numpy array representing the modified canvas with the drawn face pose.
183
+
184
+ Note:
185
+ The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
186
+ """
187
+ if not keypoints:
188
+ return canvas
189
+
190
+ H, W, C = canvas.shape
191
+ for keypoint in keypoints:
192
+ x, y = keypoint.x, keypoint.y
193
+ x = int(x * W)
194
+ y = int(y * H)
195
+ if x > eps and y > eps:
196
+ cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1)
197
+ return canvas
198
+
199
+
200
+ # detect hand according to body pose keypoints
201
+ # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
202
+ def handDetect(body: BodyResult, oriImg) -> List[Tuple[int, int, int, bool]]:
203
+ """
204
+ Detect hands in the input body pose keypoints and calculate the bounding box for each hand.
205
+
206
+ Args:
207
+ body (BodyResult): A BodyResult object containing the detected body pose keypoints.
208
+ oriImg (numpy.ndarray): A 3D numpy array representing the original input image.
209
+
210
+ Returns:
211
+ List[Tuple[int, int, int, bool]]: A list of tuples, each containing the coordinates (x, y) of the top-left
212
+ corner of the bounding box, the width (height) of the bounding box, and
213
+ a boolean flag indicating whether the hand is a left hand (True) or a
214
+ right hand (False).
215
+
216
+ Notes:
217
+ - The width and height of the bounding boxes are equal since the network requires squared input.
218
+ - The minimum bounding box size is 20 pixels.
219
+ """
220
+ ratioWristElbow = 0.33
221
+ detect_result = []
222
+ image_height, image_width = oriImg.shape[0:2]
223
+
224
+ keypoints = body.keypoints
225
+ # right hand: wrist 4, elbow 3, shoulder 2
226
+ # left hand: wrist 7, elbow 6, shoulder 5
227
+ left_shoulder = keypoints[5]
228
+ left_elbow = keypoints[6]
229
+ left_wrist = keypoints[7]
230
+ right_shoulder = keypoints[2]
231
+ right_elbow = keypoints[3]
232
+ right_wrist = keypoints[4]
233
+
234
+ # if any of three not detected
235
+ has_left = all(keypoint is not None for keypoint in (left_shoulder, left_elbow, left_wrist))
236
+ has_right = all(keypoint is not None for keypoint in (right_shoulder, right_elbow, right_wrist))
237
+ if not (has_left or has_right):
238
+ return []
239
+
240
+ hands = []
241
+ #left hand
242
+ if has_left:
243
+ hands.append([
244
+ left_shoulder.x, left_shoulder.y,
245
+ left_elbow.x, left_elbow.y,
246
+ left_wrist.x, left_wrist.y,
247
+ True
248
+ ])
249
+ # right hand
250
+ if has_right:
251
+ hands.append([
252
+ right_shoulder.x, right_shoulder.y,
253
+ right_elbow.x, right_elbow.y,
254
+ right_wrist.x, right_wrist.y,
255
+ False
256
+ ])
257
+
258
+ for x1, y1, x2, y2, x3, y3, is_left in hands:
259
+ # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
260
+ # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
261
+ # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
262
+ # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
263
+ # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
264
+ # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
265
+ x = x3 + ratioWristElbow * (x3 - x2)
266
+ y = y3 + ratioWristElbow * (y3 - y2)
267
+ distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
268
+ distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
269
+ width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
270
+ # x-y refers to the center --> offset to topLeft point
271
+ # handRectangle.x -= handRectangle.width / 2.f;
272
+ # handRectangle.y -= handRectangle.height / 2.f;
273
+ x -= width / 2
274
+ y -= width / 2 # width = height
275
+ # overflow the image
276
+ if x < 0: x = 0
277
+ if y < 0: y = 0
278
+ width1 = width
279
+ width2 = width
280
+ if x + width > image_width: width1 = image_width - x
281
+ if y + width > image_height: width2 = image_height - y
282
+ width = min(width1, width2)
283
+ # the max hand box value is 20 pixels
284
+ if width >= 20:
285
+ detect_result.append((int(x), int(y), int(width), is_left))
286
+
287
+ '''
288
+ return value: [[x, y, w, True if left hand else False]].
289
+ width=height since the network require squared input.
290
+ x, y is the coordinate of top left
291
+ '''
292
+ return detect_result
293
+
294
+
295
+ # Written by Lvmin
296
+ def faceDetect(body: BodyResult, oriImg) -> Union[Tuple[int, int, int], None]:
297
+ """
298
+ Detect the face in the input body pose keypoints and calculate the bounding box for the face.
299
+
300
+ Args:
301
+ body (BodyResult): A BodyResult object containing the detected body pose keypoints.
302
+ oriImg (numpy.ndarray): A 3D numpy array representing the original input image.
303
+
304
+ Returns:
305
+ Tuple[int, int, int] | None: A tuple containing the coordinates (x, y) of the top-left corner of the
306
+ bounding box and the width (height) of the bounding box, or None if the
307
+ face is not detected or the bounding box width is less than 20 pixels.
308
+
309
+ Notes:
310
+ - The width and height of the bounding box are equal.
311
+ - The minimum bounding box size is 20 pixels.
312
+ """
313
+ # left right eye ear 14 15 16 17
314
+ image_height, image_width = oriImg.shape[0:2]
315
+
316
+ keypoints = body.keypoints
317
+ head = keypoints[0]
318
+ left_eye = keypoints[14]
319
+ right_eye = keypoints[15]
320
+ left_ear = keypoints[16]
321
+ right_ear = keypoints[17]
322
+
323
+ if head is None or all(keypoint is None for keypoint in (left_eye, right_eye, left_ear, right_ear)):
324
+ return None
325
+
326
+ width = 0.0
327
+ x0, y0 = head.x, head.y
328
+
329
+ if left_eye is not None:
330
+ x1, y1 = left_eye.x, left_eye.y
331
+ d = max(abs(x0 - x1), abs(y0 - y1))
332
+ width = max(width, d * 3.0)
333
+
334
+ if right_eye is not None:
335
+ x1, y1 = right_eye.x, right_eye.y
336
+ d = max(abs(x0 - x1), abs(y0 - y1))
337
+ width = max(width, d * 3.0)
338
+
339
+ if left_ear is not None:
340
+ x1, y1 = left_ear.x, left_ear.y
341
+ d = max(abs(x0 - x1), abs(y0 - y1))
342
+ width = max(width, d * 1.5)
343
+
344
+ if right_ear is not None:
345
+ x1, y1 = right_ear.x, right_ear.y
346
+ d = max(abs(x0 - x1), abs(y0 - y1))
347
+ width = max(width, d * 1.5)
348
+
349
+ x, y = x0, y0
350
+
351
+ x -= width
352
+ y -= width
353
+
354
+ if x < 0:
355
+ x = 0
356
+
357
+ if y < 0:
358
+ y = 0
359
+
360
+ width1 = width * 2
361
+ width2 = width * 2
362
+
363
+ if x + width > image_width:
364
+ width1 = image_width - x
365
+
366
+ if y + width > image_height:
367
+ width2 = image_height - y
368
+
369
+ width = min(width1, width2)
370
+
371
+ if width >= 20:
372
+ return int(x), int(y), int(width)
373
+ else:
374
+ return None
375
+
376
+
377
+ # get max index of 2d array
378
+ def npmax(array):
379
+ arrayindex = array.argmax(1)
380
+ arrayvalue = array.max(1)
381
+ i = arrayvalue.argmax()
382
+ j = arrayindex[i]
383
+ return i, j
controlnet_aux/util.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+
8
+ annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts')
9
+
10
+
11
+ def HWC3(x):
12
+ assert x.dtype == np.uint8
13
+ if x.ndim == 2:
14
+ x = x[:, :, None]
15
+ assert x.ndim == 3
16
+ H, W, C = x.shape
17
+ assert C == 1 or C == 3 or C == 4
18
+ if C == 3:
19
+ return x
20
+ if C == 1:
21
+ return np.concatenate([x, x, x], axis=2)
22
+ if C == 4:
23
+ color = x[:, :, 0:3].astype(np.float32)
24
+ alpha = x[:, :, 3:4].astype(np.float32) / 255.0
25
+ y = color * alpha + 255.0 * (1.0 - alpha)
26
+ y = y.clip(0, 255).astype(np.uint8)
27
+ return y
28
+
29
+
30
+ def make_noise_disk(H, W, C, F):
31
+ noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
32
+ noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
33
+ noise = noise[F: F + H, F: F + W]
34
+ noise -= np.min(noise)
35
+ noise /= np.max(noise)
36
+ if C == 1:
37
+ noise = noise[:, :, None]
38
+ return noise
39
+
40
+
41
+ def nms(x, t, s):
42
+ x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
43
+
44
+ f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
45
+ f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
46
+ f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
47
+ f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
48
+
49
+ y = np.zeros_like(x)
50
+
51
+ for f in [f1, f2, f3, f4]:
52
+ np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
53
+
54
+ z = np.zeros_like(y, dtype=np.uint8)
55
+ z[y > t] = 255
56
+ return z
57
+
58
+ def min_max_norm(x):
59
+ x -= np.min(x)
60
+ x /= np.maximum(np.max(x), 1e-5)
61
+ return x
62
+
63
+
64
+ def safe_step(x, step=2):
65
+ y = x.astype(np.float32) * float(step + 1)
66
+ y = y.astype(np.int32).astype(np.float32) / float(step)
67
+ return y
68
+
69
+
70
+ def img2mask(img, H, W, low=10, high=90):
71
+ assert img.ndim == 3 or img.ndim == 2
72
+ assert img.dtype == np.uint8
73
+
74
+ if img.ndim == 3:
75
+ y = img[:, :, random.randrange(0, img.shape[2])]
76
+ else:
77
+ y = img
78
+
79
+ y = cv2.resize(y, (W, H), interpolation=cv2.INTER_CUBIC)
80
+
81
+ if random.uniform(0, 1) < 0.5:
82
+ y = 255 - y
83
+
84
+ return y < np.percentile(y, random.randrange(low, high))
85
+
86
+
87
+ def resize_image(input_image, resolution):
88
+ H, W, C = input_image.shape
89
+ H = float(H)
90
+ W = float(W)
91
+ k = float(resolution) / min(H, W)
92
+ H *= k
93
+ W *= k
94
+ H = int(np.round(H / 64.0)) * 64
95
+ W = int(np.round(W / 64.0)) * 64
96
+ img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
97
+ return img
98
+
99
+
100
+ def torch_gc():
101
+ if torch.cuda.is_available():
102
+ torch.cuda.empty_cache()
103
+ torch.cuda.ipc_collect()
104
+
105
+
106
+ def ade_palette():
107
+ """ADE20K palette that maps each class to RGB values."""
108
+ return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
109
+ [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
110
+ [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
111
+ [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
112
+ [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
113
+ [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
114
+ [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
115
+ [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
116
+ [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
117
+ [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
118
+ [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
119
+ [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
120
+ [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
121
+ [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
122
+ [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
123
+ [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
124
+ [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
125
+ [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
126
+ [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
127
+ [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
128
+ [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
129
+ [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
130
+ [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
131
+ [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
132
+ [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
133
+ [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
134
+ [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
135
+ [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
136
+ [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
137
+ [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
138
+ [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
139
+ [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
140
+ [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
141
+ [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
142
+ [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
143
+ [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
144
+ [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
145
+ [102, 255, 0], [92, 0, 255]]
146
+