Spaces:

Yuliang
/

ICON

Runtime error

Yuliang commited on Oct 8, 2022

Commit

7641d7c

•

1 Parent(s): f191c89

use mask_rcnn as detector

Files changed (3) hide show

app.py CHANGED Viewed

@@ -18,8 +18,6 @@ if os.getenv('SYSTEM') == 'spaces':
         'pip install https://download.is.tue.mpg.de/icon/HF/kaolin-0.11.0-cp38-cp38-linux_x86_64.whl'.split())
     subprocess.run(
         'pip install https://download.is.tue.mpg.de/icon/HF/pytorch3d-0.7.0-cp38-cp38-linux_x86_64.whl'.split())
-    subprocess.run(
-        'pip install git+https://github.com/Project-Splinter/human_det.git'.split())
     subprocess.run(
         'pip install git+https://github.com/YuliangXiu/neural_voxelization_layer.git'.split())

         'pip install https://download.is.tue.mpg.de/icon/HF/kaolin-0.11.0-cp38-cp38-linux_x86_64.whl'.split())
     subprocess.run(
         'pip install https://download.is.tue.mpg.de/icon/HF/pytorch3d-0.7.0-cp38-cp38-linux_x86_64.whl'.split())
     subprocess.run(
         'pip install git+https://github.com/YuliangXiu/neural_voxelization_layer.git'.split())

lib/dataset/TestDataset.py CHANGED Viewed

@@ -30,7 +30,6 @@ import os.path as osp
 import torch
 import numpy as np
 import random
-import human_det
 from termcolor import colored
 from PIL import ImageFile
 from huggingface_hub import cached_download
@@ -52,12 +51,6 @@ class TestDataset():
         self.device = device
-        if self.has_det:
-            self.det = human_det.Detection()
-        else:
-            self.det = None
         self.subject_list = [self.image_path]
         # smpl related
@@ -155,7 +148,7 @@ class TestDataset():
         if self.seg_dir is None:
             img_icon, img_hps, img_ori, img_mask, uncrop_param = process_image(
-                img_path, self.det, self.hps_type, 512, self.device)
             data_dict = {
                 'name': img_name,
@@ -167,7 +160,7 @@ class TestDataset():
         else:
             img_icon, img_hps, img_ori, img_mask, uncrop_param, segmentations = process_image(
-                img_path, self.det, self.hps_type, 512, self.device,
                 seg_path=os.path.join(self.seg_dir, f'{img_name}.json'))
             data_dict = {
                 'name': img_name,

 import torch
 import numpy as np
 import random
 from termcolor import colored
 from PIL import ImageFile
 from huggingface_hub import cached_download
         self.device = device
         self.subject_list = [self.image_path]
         # smpl related
         if self.seg_dir is None:
             img_icon, img_hps, img_ori, img_mask, uncrop_param = process_image(
+                img_path, self.hps_type, 512, self.device)
             data_dict = {
                 'name': img_name,
         else:
             img_icon, img_hps, img_ori, img_mask, uncrop_param, segmentations = process_image(
+                img_path, self.hps_type, 512, self.device,
                 seg_path=os.path.join(self.seg_dir, f'{img_name}.json'))
             data_dict = {
                 'name': img_name,

lib/pymaf/utils/imutils.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torch
 import numpy as np
 from PIL import Image
 from rembg.bg import remove
 from lib.pymaf.core import constants
 from lib.pymaf.utils.streamer import aug_matrix
@@ -83,7 +84,7 @@ def get_transformer(input_res):
     return [image_to_tensor, mask_to_tensor, image_to_pymaf_tensor, image_to_pixie_tensor, image_to_hybrik_tensor]
-def process_image(img_file, det, hps_type, input_res=512, device=None, seg_path=None):
     """Read image, do preprocessing and possibly crop it according to the bounding box.
     If there are bounding box annotations, use them to crop the image.
     If no bounding box is specified but openpose detections are available, use them to get the bounding box.
@@ -101,21 +102,20 @@ def process_image(img_file, det, hps_type, input_res=512, device=None, seg_path=
     img_for_crop = cv2.warpAffine(img_ori, M[0:2, :],
                                   (input_res*2, input_res*2), flags=cv2.INTER_CUBIC)
-    if det is not None:
-        # detection for bbox
-        bbox = get_bbox(img_for_crop, det)
-        width = bbox[2] - bbox[0]
-        height = bbox[3] - bbox[1]
-        center = np.array([(bbox[0] + bbox[2]) / 2.0,
-                           (bbox[1] + bbox[3]) / 2.0])
-    else:
-        # Assume that the person is centerered in the image
-        height = img_for_crop.shape[0]
-        width = img_for_crop.shape[1]
-        center = np.array([width // 2, height // 2])
     scale = max(height, width) / 180

 import numpy as np
 from PIL import Image
 from rembg.bg import remove
+from torchvision.models import detection
 from lib.pymaf.core import constants
 from lib.pymaf.utils.streamer import aug_matrix
     return [image_to_tensor, mask_to_tensor, image_to_pymaf_tensor, image_to_pixie_tensor, image_to_hybrik_tensor]
+def process_image(img_file, hps_type, input_res=512, device=None, seg_path=None):
     """Read image, do preprocessing and possibly crop it according to the bounding box.
     If there are bounding box annotations, use them to crop the image.
     If no bounding box is specified but openpose detections are available, use them to get the bounding box.
     img_for_crop = cv2.warpAffine(img_ori, M[0:2, :],
                                   (input_res*2, input_res*2), flags=cv2.INTER_CUBIC)
+    # detection for bbox
+    detector = detection.maskrcnn_resnet50_fpn(pretrained=True)
+    detector.eval()
+    predictions = detector(
+        [torch.from_numpy(img_for_crop).permute(2, 0, 1) / 255.])[0]
+    human_ids = torch.logical_and(
+        predictions["labels"] == 1,
+        predictions["scores"] == predictions["scores"].max()).nonzero().squeeze(1)
+    bbox = predictions["boxes"][human_ids, :].flatten().detach().cpu().numpy()
+    width = bbox[2] - bbox[0]
+    height = bbox[3] - bbox[1]
+    center = np.array([(bbox[0] + bbox[2]) / 2.0,
+                        (bbox[1] + bbox[3]) / 2.0])
     scale = max(height, width) / 180