hieugn
/

mlops

Model card Files Files and versions Community

Hieu Ngoc Giap commited on May 9

Commit

2b07837

•

1 Parent(s): ecf1cc2

Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

pose/config/config.yaml +29 -0
pose/config/load_cfg.py +41 -0
pose/logs/__init__.py +32 -0
pose/poetry.lock +0 -0
pose/pyproject.toml +48 -0
pose/src/inference/__init__.py +0 -0
pose/src/inference/__pycache__/__init__.cpython-39.pyc +0 -0
pose/src/inference/__pycache__/base.cpython-39.pyc +0 -0
pose/src/inference/__pycache__/decode.cpython-39.pyc +0 -0
pose/src/inference/__pycache__/pose_inference.cpython-39.pyc +0 -0
pose/src/inference/base.py +57 -0
pose/src/inference/decode.py +537 -0
pose/src/inference/pose_inference.py +132 -0
pose/src/weights/pose_model_scratch.pth +3 -0

pose/config/config.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# -------------- Data -------------
+data_root_path: ./data/raw
+train_mask_data_path: ./data/raw/mask/train2014/mask_COCO_train2014_
+val_mask_data_path: ./data/raw/mask/val2014/mask_COCO_val2014_
+label_file: label.json
+label_subset_file: label_subset.json
+# -------------- Model -------------
+model_weight_path: ./src/weights/pose_model_scratch.pth
+# -------------- Logging ----------
+logging_file: ./logs/logging_file.log
+# ------------- Hyperparamters ------------
+hyperparameters:
+  train_batch_size: 8
+  val_batch_size: 8
+  lr: 0.001
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+  epochs: 20
+# -------------- DVC remote ---------------
+dvc_remote_name: gcs-storage
+dvc_remote_url: gs://human-pose-data-bucket/data
+# -------------- MLflow --------------
+experiment_name: openpose-human-pose-training

pose/config/load_cfg.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from pathlib import Path
+from typing import Optional
+import yaml
+ROOT = Path(__file__).resolve().parent.parent
+CONFIG_FILE_PATH = ROOT / "config" / "config.yaml"
+class DictDotNotation(dict):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.__dict__ = self
+def _find_config_file() -> Path:
+    """Locate the configuration file."""
+    if CONFIG_FILE_PATH.is_file():
+        return CONFIG_FILE_PATH
+    raise FileNotFoundError(f"Config file not found at {CONFIG_FILE_PATH}")
+def load_config_file(cfg_path: Optional[Path] = None) -> Optional[dict]:
+    if not cfg_path:
+        cfg_path = _find_config_file()
+    if cfg_path:
+        with open(cfg_path, "r") as f:
+            yaml_data = yaml.safe_load(f)
+            if not yaml_data:
+                raise ValueError("Invalid or empty YAML configuration")
+            return yaml_data
+def configure() -> DictDotNotation:
+    cfg = load_config_file()
+    cfg = DictDotNotation(cfg)
+    return cfg
+cfg = configure()

pose/logs/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from typing import TypeVar
+import loguru
+from config import cfg
+log_level = "DEBUG"
+log_format = (
+    "<green>{time:YYYY-MM-DD HH:mm:ss.SSS zz}</green> | "
+    "<level>{level: <8}</level> | "
+    "<yellow>Line {line: >4} ({file}):</yellow> <b>{message}</b>"
+)
+_T_logoru_logger = TypeVar("_T_logoru_logger", bound=loguru._logger.Logger)
+def logger_handler(
+    use_log_file: bool = True, file: str = "./logs/logging_file.log"
+) -> _T_logoru_logger:
+    if use_log_file:
+        loguru.logger.add(
+            file,
+            level=log_level,
+            format=log_format,
+            colorize=False,
+            backtrace=True,
+            diagnose=True,
+        )
+    return loguru.logger
+log = logger_handler(file=cfg.logging_file)

pose/poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pose/pyproject.toml ADDED Viewed

	@@ -0,0 +1,48 @@

+[tool.poetry]
+name = "human-pose-estimation-development"
+version = "0.1.0"
+description = "Development stage of human pose estimation system"
+authors = ["To Duc Thanh"]
+license = "MIT"
+readme = "README.md"
+package-mode = false
+[tool.poetry.dependencies]
+python = ">=3.9,<3.9.7"
+pyyaml = "^6.0.1"
+torch = "^2.2.2"
+torchvision = "^0.17.2"
+opencv-python = "^4.9.0.80"
+tqdm = "^4.66.2"
+loguru = "^0.7.2"
+matplotlib = "^3.8.4"
+python-dotenv = "^1.0.1"
+mlflow = "^2.11.3"
+pynvml = "^11.5.0"
+dvc = {extras = ["gdrive", "gs"], version = "^3.48.4"}
+scipy = "^1.13.0"
+minio = "^7.2.5"
+[tool.poetry.group.dev.dependencies]
+isort = "^5.13.2"
+pytest = "^8.1.1"
+pre-commit = "^3.7.0"
+jupyterlab = "^4.1.5"
+ruff = "^0.3.5"
+[tool.poetry.group.deploy.dependencies]
+torch = "^2.2.2"
+torchvision = "^0.17.2"
+opencv-python = "^4.9.0.80"
+scipy = "^1.13.0"
+loguru = "^0.7.2"
+matplotlib = "^3.8.4"
+fastapi = "^0.110.1"
+uvicorn = {extras = ["standard"], version = "^0.29.0"}
+python-multipart = "^0.0.9"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

pose/src/inference/__init__.py ADDED Viewed

File without changes

pose/src/inference/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (207 Bytes). View file

pose/src/inference/__pycache__/base.cpython-39.pyc ADDED Viewed

Binary file (2.15 kB). View file

pose/src/inference/__pycache__/decode.cpython-39.pyc ADDED Viewed

Binary file (12.5 kB). View file

pose/src/inference/__pycache__/pose_inference.cpython-39.pyc ADDED Viewed

Binary file (3.9 kB). View file

pose/src/inference/base.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import Union
+import numpy as np
+import torch
+class PoseInferenceBase(ABC):
+    @abstractmethod
+    def preprocess(
+        self, img: Union[np.ndarray, str], *args, **kwargs
+    ) -> Union[torch.Tensor, np.ndarray]:
+        """
+        Preprocesses the input image before inference.
+        Args:
+            img (Union[np.ndarray, str]): The input image as a NumPy array or a path to the image file.
+        Returns:
+            Union[torch.Tensor, np.ndarray]: The preprocessed image tensor or array ready for inference.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def process(
+        self, img: Union[np.ndarray, str], *args, **kwargs
+    ) -> Union[torch.Tensor, np.ndarray]:
+        """
+        Performs inference on the input image.
+        Args:
+            img (Union[np.ndarray, str]): The input image as a NumPy array or a path to the image file.
+        Returns:
+            Union[torch.Tensor, np.ndarray]: The output of the inference process.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def postprocess(
+        self, oriImg: np.ndarray, heatmaps: np.ndarray, pafs: np.ndarray, *args, **kwargs
+    ) -> Union[torch.Tensor, np.ndarray]:
+        """
+        Postprocesses the inference results.
+        Args:
+            oriImg (np.ndarray): The original input image.
+            heatmaps (np.ndarray): The heatmaps generated by the inference.
+            pafs (np.ndarray): The Part Affinity Fields (PAFs) generated by the inference.
+        Returns:
+            Union[torch.Tensor, np.ndarray]: The postprocessed results.
+        """
+        raise NotImplementedError

pose/src/inference/decode.py ADDED Viewed

	@@ -0,0 +1,537 @@

+import math
+from typing import (
+    List,
+    Tuple,
+)
+import cv2
+import matplotlib.cm
+import numpy as np
+from scipy.ndimage.filters import (
+    gaussian_filter,
+    maximum_filter,
+)
+from scipy.ndimage.morphology import generate_binary_structure
+# It is better to use 0.1 as threshold when evaluation, but 0.3 for demo
+# purpose.
+cmap = matplotlib.cm.get_cmap("hsv")
+# Heatmap indices to find each limb (joint connection). Eg: limb_type=1 is
+# Neck->LShoulder, so joint_to_limb_heatmap_relationship[1] represents the
+# indices of heatmaps to look for joints: neck=1, LShoulder=5
+joint_to_limb_heatmap_relationship = [
+    [1, 2],
+    [1, 5],
+    [2, 3],
+    [3, 4],
+    [5, 6],
+    [6, 7],
+    [1, 8],
+    [8, 9],
+    [9, 10],
+    [1, 11],
+    [11, 12],
+    [12, 13],
+    [1, 0],
+    [0, 14],
+    [14, 16],
+    [0, 15],
+    [15, 17],
+    [2, 16],
+    [5, 17],
+]
+# PAF indices containing the x and y coordinates of the PAF for a given limb.
+# Eg: limb_type=1 is Neck->LShoulder, so
+# PAFneckLShoulder_x=paf_xy_coords_per_limb[1][0] and
+# PAFneckLShoulder_y=paf_xy_coords_per_limb[1][1]
+paf_xy_coords_per_limb = [
+    [12, 13],
+    [20, 21],
+    [14, 15],
+    [16, 17],
+    [22, 23],
+    [24, 25],
+    [0, 1],
+    [2, 3],
+    [4, 5],
+    [6, 7],
+    [8, 9],
+    [10, 11],
+    [28, 29],
+    [30, 31],
+    [34, 35],
+    [32, 33],
+    [36, 37],
+    [18, 19],
+    [26, 27],
+]
+# Color code used to plot different joints and limbs (eg: joint_type=3 and
+# limb_type=3 will use colors[3])
+colors = [
+    [255, 0, 0],
+    [255, 85, 0],
+    [255, 170, 0],
+    [255, 255, 0],
+    [170, 255, 0],
+    [85, 255, 0],
+    [0, 255, 0],
+    [0, 255, 85],
+    [0, 255, 170],
+    [0, 255, 255],
+    [0, 170, 255],
+    [0, 85, 255],
+    [0, 0, 255],
+    [85, 0, 255],
+    [170, 0, 255],
+    [255, 0, 255],
+    [255, 0, 170],
+    [255, 0, 85],
+    [255, 0, 0],
+]
+NUM_JOINTS = 18
+NUM_LIMBS = len(joint_to_limb_heatmap_relationship)
+def find_peaks(param: dict, img: np.ndarray) -> np.ndarray:
+    """
+    Finds local maxima in a (grayscale) image whose values are above a given threshold.
+    Args:
+        param (dict): A dictionary containing parameters.
+            - 'thre1' (float): Threshold for peak detection.
+        img (np.ndarray): Input grayscale image (2D array) where peaks are to be found.
+    Returns:
+        np.ndarray: A 2D array containing the [x, y] coordinates of each peak found in the image.
+    """
+    peaks_binary = (maximum_filter(img, footprint=generate_binary_structure(2, 1)) == img) * (
+        img > param["thre1"]
+    )
+    # Note reverse ([::-1]): we return [[x y], [x y]...] instead of [[y x], [y x]...]
+    return np.array(np.nonzero(peaks_binary)[::-1]).T
+def compute_resized_coords(coords: Tuple[float, float], resizeFactor: float) -> np.ndarray:
+    """
+    Computes the new coordinates of a cell in an array after resizing the array.
+    Args:
+        coords (Tuple[float, float]): Coordinates (indices) of a cell in some input array.
+        resizeFactor (float): Resize coefficient, indicating how much bigger the destination array is compared to the original one.
+    Returns:
+        np.ndarray: Coordinates in an array of size `shape_dest`, expressing the array indices of the closest point to 'coords'
+            if an image of size `shape_source` was resized to `shape_dest`.
+    """
+    # 1) Add 0.5 to coords to get coordinates of center of the pixel (e.g.
+    # index [0,0] represents the pixel at location [0.5,0.5])
+    # 2) Transform those coordinates to shape_dest, by multiplying by resizeFactor
+    # 3) That number represents the location of the pixel center in the new array,
+    # so subtract 0.5 to get coordinates of the array index/indices (revert step 1)
+    return (np.array(coords, dtype=float) + 0.5) * resizeFactor - 0.5
+def NMS(
+    param: dict,
+    heatmaps: np.ndarray,
+    upsampFactor: float = 1.0,
+    bool_refine_center: bool = True,
+    bool_gaussian_filt: bool = False,
+) -> List[np.ndarray]:
+    """
+    Performs Non-Maxima Suppression (NMS) to find peaks (local maxima) in a set of grayscale images.
+    Args:
+        param (dict): Additional parameters for NMS.
+        heatmaps (np.ndarray): Set of grayscale images on which to find local maxima. A 3D numpy array with dimensions
+            image_height x image_width x num_heatmaps.
+        upsampFactor (float): Size ratio between CPM (Convolutional Pose Machine) heatmap output and the input image size.
+            For example, upsampFactor=16 if the original image was 480x640 and heatmaps are 30x40xN.
+        bool_refine_center (bool): Flag indicating whether to refine the center of the peak. Defaults to True.
+            If True, the function upsamples a small patch around each low-res peak and fine-tunes the location of the peak
+            at the resolution of the original input image. If False, simply returns the low-res peak found upscaled by upsampFactor.
+        bool_gaussian_filt (bool): Flag indicating whether to apply a 1D Gaussian filter (smoothing) to each upsampled patch
+            before fine-tuning the location of each peak. Defaults to False.
+    Returns:
+        np.ndarray: A NUM_JOINTS x 4 numpy array where each row represents a joint type (0=nose, 1=neck...) and the columns
+            indicate the {x,y} position, the score (probability), and a unique id (counter).
+    """
+    joint_list_per_joint_type = []
+    cnt_total_joints = 0
+    # For every peak found, win_size specifies how many pixels in each
+    # direction from the peak we take to obtain the patch that will be
+    # upsampled. Eg: win_size=1 -> patch is 3x3; win_size=2 -> 5x5
+    # (for BICUBIC interpolation to be accurate, win_size needs to be >=2!)
+    win_size = 2
+    for joint in range(NUM_JOINTS):
+        map_orig = heatmaps[:, :, joint]
+        peak_coords = find_peaks(param, map_orig)
+        peaks = np.zeros((len(peak_coords), 4))
+        for i, peak in enumerate(peak_coords):
+            if bool_refine_center:
+                x_min, y_min = np.maximum(0, peak - win_size)
+                x_max, y_max = np.minimum(np.array(map_orig.T.shape) - 1, peak + win_size)
+                # Take a small patch around each peak and only upsample that tiny region
+                patch = map_orig[y_min : y_max + 1, x_min : x_max + 1]
+                map_upsamp = cv2.resize(
+                    patch, None, fx=upsampFactor, fy=upsampFactor, interpolation=cv2.INTER_CUBIC
+                )
+                # Gaussian filtering takes an average of 0.8ms/peak (and there might be
+                # more than one peak per joint!) -> For now, skip it (it's
+                # accurate enough)
+                map_upsamp = (
+                    gaussian_filter(map_upsamp, sigma=3) if bool_gaussian_filt else map_upsamp
+                )
+                # Obtain the coordinates of the maximum value in the patch
+                location_of_max = np.unravel_index(map_upsamp.argmax(), map_upsamp.shape)
+                # Remember that peaks indicates [x,y] -> need to reverse it for
+                # [y,x]
+                location_of_patch_center = compute_resized_coords(
+                    peak[::-1] - [y_min, x_min], upsampFactor
+                )
+                # Calculate the offset wrt to the patch center where the actual
+                # maximum is
+                refined_center = location_of_max - location_of_patch_center
+                peak_score = map_upsamp[location_of_max]
+            else:
+                refined_center = [0, 0]
+                # Flip peak coordinates since they are [x,y] instead of [y,x]
+                peak_score = map_orig[tuple(peak[::-1])]
+            peaks[i, :] = tuple(
+                [
+                    int(round(x))
+                    for x in compute_resized_coords(peak_coords[i], upsampFactor)
+                    + refined_center[::-1]
+                ]
+            ) + (peak_score, cnt_total_joints)
+            cnt_total_joints += 1
+        joint_list_per_joint_type.append(peaks)
+    return joint_list_per_joint_type
+def find_connected_joints(
+    param: dict,
+    paf_upsamp: np.ndarray,
+    joint_list_per_joint_type: List[np.ndarray],
+    num_intermed_pts: int = 10,
+) -> List[np.ndarray]:
+    """
+    For every type of limb (e.g., forearm, shin, etc.), looks for every potential
+    pair of joints (e.g., every wrist-elbow combination) and evaluates the PAFs to
+    determine which pairs are indeed body limbs.
+    Args:
+        paf_upsamp (np.ndarray): PAFs upsampled to the original input image resolution.
+        joint_list_per_joint_type (List[np.ndarray]): List of joint lists per joint type. See the 'return' doc of NMS().
+        num_intermed_pts (int): Number of intermediate points to take between joint_src and joint_dst, at which
+            the PAFs will be evaluated. Defaults to 10.
+    Returns:
+        List[np.ndarray]: List of NUM_LIMBS rows. For every limb_type (a row) we store a list of all limbs of that type found
+        (e.g., all the right forearms). For each limb (each item in connected_limbs[limb_type]), we store 5 cells:
+        {joint_src_id, joint_dst_id}: a unique number associated with each joint,
+        limb_score_penalizing_long_dist: a score of how good a connection of the joints is, penalized if the limb length is too long,
+        {joint_src_index, joint_dst_index}: the index of the joint within all the joints of that type found
+        (e.g., the 3rd right elbow found)
+    """
+    connected_limbs = []
+    # Auxiliary array to access paf_upsamp quickly
+    limb_intermed_coords = np.empty((4, num_intermed_pts), dtype=np.intp)
+    for limb_type in range(NUM_LIMBS):
+        # List of all joints of type A found, where A is specified by limb_type
+        # (eg: a right forearm starts in a right elbow)
+        joints_src = joint_list_per_joint_type[joint_to_limb_heatmap_relationship[limb_type][0]]
+        # List of all joints of type B found, where B is specified by limb_type
+        # (eg: a right forearm ends in a right wrist)
+        joints_dst = joint_list_per_joint_type[joint_to_limb_heatmap_relationship[limb_type][1]]
+        if len(joints_src) == 0 or len(joints_dst) == 0:
+            # No limbs of this type found (eg: no right forearms found because
+            # we didn't find any right wrists or right elbows)
+            connected_limbs.append([])
+        else:
+            connection_candidates = []
+            # Specify the paf index that contains the x-coord of the paf for
+            # this limb
+            limb_intermed_coords[2, :] = paf_xy_coords_per_limb[limb_type][0]
+            # And the y-coord paf index
+            limb_intermed_coords[3, :] = paf_xy_coords_per_limb[limb_type][1]
+            for i, joint_src in enumerate(joints_src):
+                # Try every possible joints_src[i]-joints_dst[j] pair and see
+                # if it's a feasible limb
+                for j, joint_dst in enumerate(joints_dst):
+                    # Subtract the position of both joints to obtain the
+                    # direction of the potential limb
+                    limb_dir = joint_dst[:2] - joint_src[:2]
+                    # Compute the distance/length of the potential limb (norm
+                    # of limb_dir)
+                    limb_dist = np.sqrt(np.sum(limb_dir**2)) + 1e-8
+                    limb_dir = limb_dir / limb_dist  # Normalize limb_dir to be a unit vector
+                    # Linearly distribute num_intermed_pts points from the x
+                    # coordinate of joint_src to the x coordinate of joint_dst
+                    limb_intermed_coords[1, :] = np.round(
+                        np.linspace(joint_src[0], joint_dst[0], num=num_intermed_pts)
+                    )
+                    limb_intermed_coords[0, :] = np.round(
+                        np.linspace(joint_src[1], joint_dst[1], num=num_intermed_pts)
+                    )  # Same for the y coordinate
+                    intermed_paf = paf_upsamp[
+                        limb_intermed_coords[0, :],
+                        limb_intermed_coords[1, :],
+                        limb_intermed_coords[2:4, :],
+                    ].T
+                    score_intermed_pts = intermed_paf.dot(limb_dir)
+                    score_penalizing_long_dist = score_intermed_pts.mean() + min(
+                        0.5 * paf_upsamp.shape[0] / limb_dist - 1, 0
+                    )
+                    # Criterion 1: At least 80% of the intermediate points have
+                    # a score higher than thre2
+                    criterion1 = (
+                        np.count_nonzero(score_intermed_pts > param["thre2"])
+                        > 0.8 * num_intermed_pts
+                    )
+                    # Criterion 2: Mean score, penalized for large limb
+                    # distances (larger than half the image height), is
+                    # positive
+                    criterion2 = score_penalizing_long_dist > 0
+                    if criterion1 and criterion2:
+                        # Last value is the combined paf(+limb_dist) + heatmap
+                        # scores of both joints
+                        connection_candidates.append(
+                            [
+                                i,
+                                j,
+                                score_penalizing_long_dist,
+                                score_penalizing_long_dist + joint_src[2] + joint_dst[2],
+                            ]
+                        )
+            # Sort connection candidates based on their
+            # score_penalizing_long_dist
+            connection_candidates = sorted(connection_candidates, key=lambda x: x[2], reverse=True)
+            connections = np.empty((0, 5))
+            # There can only be as many limbs as the smallest number of source
+            # or destination joints (eg: only 2 forearms if there's 5 wrists
+            # but 2 elbows)
+            max_connections = min(len(joints_src), len(joints_dst))
+            # Traverse all potential joint connections (sorted by their score)
+            for potential_connection in connection_candidates:
+                i, j, s = potential_connection[0:3]
+                # Make sure joints_src[i] or joints_dst[j] haven't already been
+                # connected to other joints_dst or joints_src
+                if i not in connections[:, 3] and j not in connections[:, 4]:
+                    # [joint_src_id, joint_dst_id, limb_score_penalizing_long_dist, joint_src_index, joint_dst_index]
+                    connections = np.vstack(
+                        [connections, [joints_src[i][3], joints_dst[j][3], s, i, j]]
+                    )
+                    # Exit if we've already established max_connections
+                    # connections (each joint can't be connected to more than
+                    # one joint)
+                    if len(connections) >= max_connections:
+                        break
+            connected_limbs.append(connections)
+    return connected_limbs
+def group_limbs_of_same_person(
+    connected_limbs: List[np.ndarray], joint_list: np.ndarray
+) -> np.ndarray:
+    """
+    Associate limbs belonging to the same person together.
+    Args:
+        connected_limbs (List[np.ndarray]): List of connected limbs.
+            See the 'return' doc of find_connected_joints().
+        joint_list (np.ndarray): Unraveled version of joint_list_per_joint.
+            See the 'return' doc of NMS().
+    Returns:
+        np.ndarray: A 2D array of size num_people x (NUM_JOINTS+2). For each person found:
+            - First NUM_JOINTS columns contain the index (in joint_list) of the joints associated with that person
+              (or -1 if their i-th joint wasn't found).
+            - 2nd-to-last column: Overall score of the joints+limbs that belong to this person.
+    """
+    person_to_joint_assoc = []
+    for limb_type in range(NUM_LIMBS):
+        joint_src_type, joint_dst_type = joint_to_limb_heatmap_relationship[limb_type]
+        for limb_info in connected_limbs[limb_type]:
+            person_assoc_idx = []
+            for person, person_limbs in enumerate(person_to_joint_assoc):
+                if (
+                    person_limbs[joint_src_type] == limb_info[0]
+                    or person_limbs[joint_dst_type] == limb_info[1]
+                ):
+                    person_assoc_idx.append(person)
+            # If one of the joints has been associated to a person, and either
+            # the other joint is also associated with the same person or not
+            # associated to anyone yet:
+            if len(person_assoc_idx) == 1:
+                person_limbs = person_to_joint_assoc[person_assoc_idx[0]]
+                # If the other joint is not associated to anyone yet,
+                if person_limbs[joint_dst_type] != limb_info[1]:
+                    # Associate it with the current person
+                    person_limbs[joint_dst_type] = limb_info[1]
+                    # Increase the number of limbs associated to this person
+                    person_limbs[-1] += 1
+                    # And update the total score (+= heatmap score of joint_dst
+                    # + score of connecting joint_src with joint_dst)
+                    person_limbs[-2] += joint_list[limb_info[1].astype(int), 2] + limb_info[2]
+            elif len(person_assoc_idx) == 2:  # if found 2 and disjoint, merge them
+                person1_limbs = person_to_joint_assoc[person_assoc_idx[0]]
+                person2_limbs = person_to_joint_assoc[person_assoc_idx[1]]
+                membership = ((person1_limbs >= 0) & (person2_limbs >= 0))[:-2]
+                if (
+                    not membership.any()
+                ):  # If both people have no same joints connected, merge them into a single person
+                    # Update which joints are connected
+                    person1_limbs[:-2] += person2_limbs[:-2] + 1
+                    # Update the overall score and total count of joints
+                    # connected by summing their counters
+                    person1_limbs[-2:] += person2_limbs[-2:]
+                    # Add the score of the current joint connection to the
+                    # overall score
+                    person1_limbs[-2] += limb_info[2]
+                    person_to_joint_assoc.pop(person_assoc_idx[1])
+                else:  # Same case as len(person_assoc_idx)==1 above
+                    person1_limbs[joint_dst_type] = limb_info[1]
+                    person1_limbs[-1] += 1
+                    person1_limbs[-2] += joint_list[limb_info[1].astype(int), 2] + limb_info[2]
+            else:  # No person has claimed any of these joints, create a new person
+                # Initialize person info to all -1 (no joint associations)
+                row = -1 * np.ones(20)
+                # Store the joint info of the new connection
+                row[joint_src_type] = limb_info[0]
+                row[joint_dst_type] = limb_info[1]
+                # Total count of connected joints for this person: 2
+                row[-1] = 2
+                # Compute overall score: score joint_src + score joint_dst + score connection
+                # {joint_src,joint_dst}
+                row[-2] = sum(joint_list[limb_info[:2].astype(int), 2]) + limb_info[2]
+                person_to_joint_assoc.append(row)
+    # Delete people who have very few parts connected
+    people_to_delete = []
+    for person_id, person_info in enumerate(person_to_joint_assoc):
+        if person_info[-1] < 3 or person_info[-2] / person_info[-1] < 0.2:
+            people_to_delete.append(person_id)
+    # Traverse the list in reverse order so we delete indices starting from the
+    # last one (otherwise, removing item for example 0 would modify the indices of
+    # the remaining people to be deleted!)
+    for index in people_to_delete[::-1]:
+        person_to_joint_assoc.pop(index)
+    # Appending items to a np.array can be very costly (allocating new memory, copying over the array, then adding new row)
+    # Instead, we treat the set of people as a list (fast to append items) and
+    # only convert to np.array at the end
+    return np.array(person_to_joint_assoc)
+def plot_pose(
+    img_orig: np.ndarray,
+    joint_list: np.ndarray,
+    person_to_joint_assoc: np.ndarray,
+    bool_fast_plot: bool = True,
+    plot_ear_to_shoulder: bool = False,
+) -> Tuple[np.ndarray, np.ndarray]:
+    canvas = img_orig.copy()  # Make a copy so we don't modify the original image
+    # to_plot is the location of all joints found overlaid on top of the
+    # original image
+    to_plot = canvas.copy() if bool_fast_plot else cv2.addWeighted(img_orig, 0.3, canvas, 0.7, 0)
+    limb_thickness = 4
+    # Last 2 limbs connect ears with shoulders and this looks very weird.
+    # Disabled by default to be consistent with original rtpose output
+    which_limbs_to_plot = NUM_LIMBS if plot_ear_to_shoulder else NUM_LIMBS - 2
+    for limb_type in range(which_limbs_to_plot):
+        for person_joint_info in person_to_joint_assoc:
+            joint_indices = person_joint_info[
+                joint_to_limb_heatmap_relationship[limb_type]
+            ].astype(int)
+            if -1 in joint_indices:
+                # Only draw actual limbs (connected joints), skip if not
+                # connected
+                continue
+            # joint_coords[:,0] represents Y coords of both joints;
+            # joint_coords[:,1], X coords
+            joint_coords = joint_list[joint_indices, 0:2]
+            for joint in joint_coords:
+                cv2.circle(canvas, tuple(joint[0:2].astype(int)), 2, (255, 255, 255), thickness=-1)
+            # mean along the axis=0 computes meanYcoord and meanXcoord -> Round
+            # and make int to avoid errors
+            coords_center = tuple(np.round(np.mean(joint_coords, 0)).astype(int))
+            # joint_coords[0,:] is the coords of joint_src; joint_coords[1,:]
+            # is the coords of joint_dst
+            limb_dir = joint_coords[0, :] - joint_coords[1, :]
+            limb_length = np.linalg.norm(limb_dir)
+            # Get the angle of limb_dir in degrees using atan2(limb_dir_x, limb_dir_y)
+            angle = math.degrees(math.atan2(limb_dir[1], limb_dir[0]))
+            # For faster plotting, just plot over canvas instead of constantly
+            # copying it
+            cur_canvas = canvas if bool_fast_plot else canvas.copy()
+            polygon = cv2.ellipse2Poly(
+                coords_center, (int(limb_length / 2), limb_thickness), int(angle), 0, 360, 1
+            )
+            cv2.fillConvexPoly(cur_canvas, polygon, colors[limb_type])
+            if not bool_fast_plot:
+                canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+    return to_plot, canvas
+def decode_pose(
+    img_orig: np.ndarray, heatmaps: np.ndarray, pafs: np.ndarray
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    param = {"thre1": 0.1, "thre2": 0.05, "thre3": 0.5}
+    # Bottom-up approach:
+    # Step 1: find all joints in the image (organized by joint type: [0]=nose, [1]=neck...)
+    joint_list_per_joint_type = NMS(param, heatmaps, img_orig.shape[0] / float(heatmaps.shape[0]))
+    # joint_list is an unravel'd version of joint_list_per_joint, where we add
+    # a 5th column to indicate the joint_type (0=nose, 1=neck...)
+    joint_list = np.array(
+        [
+            tuple(peak) + (joint_type,)
+            for joint_type, joint_peaks in enumerate(joint_list_per_joint_type)
+            for peak in joint_peaks
+        ]
+    )
+    # Step 2: find which joints go together to form limbs (which wrists go with which elbows)
+    paf_upsamp = cv2.resize(
+        pafs,
+        (img_orig.shape[1], img_orig.shape[0]),
+        interpolation=cv2.INTER_CUBIC,
+    )
+    connected_limbs = find_connected_joints(param, paf_upsamp, joint_list_per_joint_type)
+    # Step 3: associate limbs that belong to the same person
+    person_to_joint_assoc = group_limbs_of_same_person(connected_limbs, joint_list)
+    # Step 4: plot results
+    to_plot, canvas = plot_pose(img_orig, joint_list, person_to_joint_assoc)
+    return to_plot, canvas, joint_list, person_to_joint_assoc

pose/src/inference/pose_inference.py ADDED Viewed

	@@ -0,0 +1,132 @@

+from typing import (
+    Optional,
+    Tuple,
+    Union,
+)
+import cv2
+import numpy as np
+import torch
+from loguru import logger
+from src.models.networks import OpenPoseNet
+from .base import PoseInferenceBase
+from .decode import decode_pose
+class PoseInference(PoseInferenceBase):
+    def __init__(self, model_weight_path: str, device: Optional[str] = None):
+        super().__init__()
+        self.net = OpenPoseNet()
+        if not device:
+            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = device
+        net_weights = torch.load(model_weight_path, map_location=self.device)
+        keys = list(net_weights.keys())
+        weights_load = {}
+        for i in range(len(keys)):
+            weights_load[list(self.net.state_dict().keys())[i]] = net_weights[list(keys)[i]]
+        state = self.net.state_dict()
+        state.update(weights_load)
+        self.net.load_state_dict(state)
+        self.net.eval()
+        logger.info(f"Load model successfully to device '{self.device}' for inference")
+    def preprocess(
+        self,
+        img: Union[np.ndarray, str],
+        size: Tuple[int] = (368, 368),
+        color_mean: Tuple[float] = (0.485, 0.456, 0.406),
+        color_std: Tuple[float] = (0.229, 0.224, 0.225),
+    ) -> torch.Tensor:
+        if isinstance(img, str):
+            original_img = cv2.imread(img)
+        elif isinstance(img, np.ndarray):
+            original_img = img
+        else:
+            raise ValueError("'img' parameter must be of type string or numpy array")
+        original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(original_img, size, interpolation=cv2.INTER_CUBIC)
+        img = img.astype(np.float32) / 255.0
+        preprocessed_img = img.copy()
+        for i in range(3):
+            preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - color_mean[i]
+            preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / color_std[i]
+        img = preprocessed_img.transpose((2, 0, 1)).astype(np.float32)
+        img = torch.from_numpy(img)
+        img = img.unsqueeze(0)
+        return img, original_img
+    def process(
+        self,
+        img: Union[np.ndarray, str],
+        size: Tuple[int] = (368, 368),
+        color_mean: Tuple[float] = (0.485, 0.456, 0.406),
+        color_std: Tuple[float] = (0.229, 0.224, 0.225),
+    ):
+        preprocessed_img, original_img = self.preprocess(
+            img=img, size=size, color_mean=color_mean, color_std=color_std
+        )
+        # Run model
+        predicted_outputs, _ = self.net(preprocessed_img)
+        shape = original_img.shape
+        heatmaps = PoseInference._generate_heatmap(predicted_outputs, size, shape)
+        pafs = PoseInference._generate_part_affinity_fields(predicted_outputs, size, shape)
+        result_img = self.postprocess(original_img, heatmaps, pafs)
+        result_img = cv2.cvtColor(result_img, cv2.COLOR_RGB2BGR)
+        return result_img
+    def postprocess(
+        self,
+        oriImg: np.ndarray,
+        heatmaps: np.ndarray,
+        pafs: np.ndarray,
+    ) -> np.ndarray:
+        _, result_img, _, _ = decode_pose(oriImg, heatmaps, pafs)
+        return result_img
+    @staticmethod
+    def _generate_heatmap(
+        predicted_outputs: torch.Tensor,
+        size: Tuple[int],
+        oriImg_shape: Tuple[int],
+    ) -> np.ndarray:
+        _heatmaps = predicted_outputs[1][0].detach().numpy().transpose(1, 2, 0)
+        _heatmaps = cv2.resize(_heatmaps, size, interpolation=cv2.INTER_CUBIC)
+        _heatmaps = cv2.resize(
+            _heatmaps,
+            (oriImg_shape[1], oriImg_shape[0]),
+            interpolation=cv2.INTER_CUBIC,
+        )
+        logger.info("Generate heatmap ...")
+        return _heatmaps
+    @staticmethod
+    def _generate_part_affinity_fields(
+        predicted_outputs: torch.Tensor,
+        size: Tuple[int],
+        oriImg_shape: Tuple[int],
+    ) -> np.ndarray:
+        _pafs = predicted_outputs[0][0].detach().numpy().transpose(1, 2, 0)
+        _pafs = cv2.resize(_pafs, size, interpolation=cv2.INTER_CUBIC)
+        _pafs = cv2.resize(
+            _pafs,
+            (oriImg_shape[1], oriImg_shape[0]),
+            interpolation=cv2.INTER_CUBIC,
+        )
+        logger.info("Generate part affinity fields ...")
+        return _pafs

pose/src/weights/pose_model_scratch.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb202d924c7c2b3b3943c879a34fdc539f0e29648764df920bc50d21681272fb
+size 209282651