gartajackhats1985's picture
Upload 171 files
c37b2dd verified
from pathlib import Path
from PIL import Image
import impact.core as core
import cv2
import numpy as np
from torchvision.transforms.functional import to_pil_image
import torch
orig_torch_load = torch.load
try:
from ultralytics import YOLO
except Exception as e:
print(e)
print(f"\n!!!!!\n\n[ComfyUI-Impact-Subpack] If this error occurs, please check the following link:\n\thttps://github.com/ltdrdata/ComfyUI-Impact-Pack/blob/Main/troubleshooting/TROUBLESHOOTING.md\n\n!!!!!\n")
raise e
# HOTFIX: https://github.com/ltdrdata/ComfyUI-Impact-Pack/issues/754
# importing YOLO breaking original torch.load capabilities
torch.load = orig_torch_load
def load_yolo(model_path: str):
try:
return YOLO(model_path)
except ModuleNotFoundError:
# https://github.com/ultralytics/ultralytics/issues/3856
YOLO("yolov8n.pt")
return YOLO(model_path)
def inference_bbox(
model,
image: Image.Image,
confidence: float = 0.3,
device: str = "",
):
pred = model(image, conf=confidence, device=device)
bboxes = pred[0].boxes.xyxy.cpu().numpy()
cv2_image = np.array(image)
if len(cv2_image.shape) == 3:
cv2_image = cv2_image[:, :, ::-1].copy() # Convert RGB to BGR for cv2 processing
else:
# Handle the grayscale image here
# For example, you might want to convert it to a 3-channel grayscale image for consistency:
cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_GRAY2BGR)
cv2_gray = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2GRAY)
segms = []
for x0, y0, x1, y1 in bboxes:
cv2_mask = np.zeros(cv2_gray.shape, np.uint8)
cv2.rectangle(cv2_mask, (int(x0), int(y0)), (int(x1), int(y1)), 255, -1)
cv2_mask_bool = cv2_mask.astype(bool)
segms.append(cv2_mask_bool)
n, m = bboxes.shape
if n == 0:
return [[], [], [], []]
results = [[], [], [], []]
for i in range(len(bboxes)):
results[0].append(pred[0].names[int(pred[0].boxes[i].cls.item())])
results[1].append(bboxes[i])
results[2].append(segms[i])
results[3].append(pred[0].boxes[i].conf.cpu().numpy())
return results
def inference_segm(
model,
image: Image.Image,
confidence: float = 0.3,
device: str = "",
):
pred = model(image, conf=confidence, device=device)
bboxes = pred[0].boxes.xyxy.cpu().numpy()
n, m = bboxes.shape
if n == 0:
return [[], [], [], []]
# NOTE: masks.data will be None when n == 0
segms = pred[0].masks.data.cpu().numpy()
h_segms = segms.shape[1]
w_segms = segms.shape[2]
h_orig = image.size[1]
w_orig = image.size[0]
ratio_segms = h_segms / w_segms
ratio_orig = h_orig / w_orig
if ratio_segms == ratio_orig:
h_gap = 0
w_gap = 0
elif ratio_segms > ratio_orig:
h_gap = int((ratio_segms - ratio_orig) * h_segms)
w_gap = 0
else:
h_gap = 0
ratio_segms = w_segms / h_segms
ratio_orig = w_orig / h_orig
w_gap = int((ratio_segms - ratio_orig) * w_segms)
results = [[], [], [], []]
for i in range(len(bboxes)):
results[0].append(pred[0].names[int(pred[0].boxes[i].cls.item())])
results[1].append(bboxes[i])
mask = torch.from_numpy(segms[i])
mask = mask[h_gap:mask.shape[0] - h_gap, w_gap:mask.shape[1] - w_gap]
scaled_mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(image.size[1], image.size[0]),
mode='bilinear', align_corners=False)
scaled_mask = scaled_mask.squeeze().squeeze()
results[2].append(scaled_mask.numpy())
results[3].append(pred[0].boxes[i].conf.cpu().numpy())
return results
class UltraBBoxDetector:
bbox_model = None
def __init__(self, bbox_model):
self.bbox_model = bbox_model
def detect(self, image, threshold, dilation, crop_factor, drop_size=1, detailer_hook=None):
drop_size = max(drop_size, 1)
detected_results = inference_bbox(self.bbox_model, core.tensor2pil(image), threshold)
segmasks = core.create_segmasks(detected_results)
if dilation > 0:
segmasks = core.dilate_masks(segmasks, dilation)
items = []
h = image.shape[1]
w = image.shape[2]
for x, label in zip(segmasks, detected_results[0]):
item_bbox = x[0]
item_mask = x[1]
y1, x1, y2, x2 = item_bbox
if x2 - x1 > drop_size and y2 - y1 > drop_size: # minimum dimension must be (2,2) to avoid squeeze issue
crop_region = core.make_crop_region(w, h, item_bbox, crop_factor)
if detailer_hook is not None:
crop_region = detailer_hook.post_crop_region(w, h, item_bbox, crop_region)
cropped_image = core.crop_image(image, crop_region)
cropped_mask = core.crop_ndarray2(item_mask, crop_region)
confidence = x[2]
# bbox_size = (item_bbox[2]-item_bbox[0],item_bbox[3]-item_bbox[1]) # (w,h)
item = core.SEG(cropped_image, cropped_mask, confidence, crop_region, item_bbox, label, None)
items.append(item)
shape = image.shape[1], image.shape[2]
segs = shape, items
if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
segs = detailer_hook.post_detection(segs)
return segs
def detect_combined(self, image, threshold, dilation):
detected_results = inference_bbox(self.bbox_model, core.tensor2pil(image), threshold)
segmasks = core.create_segmasks(detected_results)
if dilation > 0:
segmasks = core.dilate_masks(segmasks, dilation)
return core.combine_masks(segmasks)
def setAux(self, x):
pass
class UltraSegmDetector:
bbox_model = None
def __init__(self, bbox_model):
self.bbox_model = bbox_model
def detect(self, image, threshold, dilation, crop_factor, drop_size=1, detailer_hook=None):
drop_size = max(drop_size, 1)
detected_results = inference_segm(self.bbox_model, core.tensor2pil(image), threshold)
segmasks = core.create_segmasks(detected_results)
if dilation > 0:
segmasks = core.dilate_masks(segmasks, dilation)
items = []
h = image.shape[1]
w = image.shape[2]
for x, label in zip(segmasks, detected_results[0]):
item_bbox = x[0]
item_mask = x[1]
y1, x1, y2, x2 = item_bbox
if x2 - x1 > drop_size and y2 - y1 > drop_size: # minimum dimension must be (2,2) to avoid squeeze issue
crop_region = core.make_crop_region(w, h, item_bbox, crop_factor)
if detailer_hook is not None:
crop_region = detailer_hook.post_crop_region(w, h, item_bbox, crop_region)
cropped_image = core.crop_image(image, crop_region)
cropped_mask = core.crop_ndarray2(item_mask, crop_region)
confidence = x[2]
# bbox_size = (item_bbox[2]-item_bbox[0],item_bbox[3]-item_bbox[1]) # (w,h)
item = core.SEG(cropped_image, cropped_mask, confidence, crop_region, item_bbox, label, None)
items.append(item)
shape = image.shape[1], image.shape[2]
segs = shape, items
if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
segs = detailer_hook.post_detection(segs)
return segs
def detect_combined(self, image, threshold, dilation):
detected_results = inference_segm(self.bbox_model, core.tensor2pil(image), threshold)
segmasks = core.create_segmasks(detected_results)
if dilation > 0:
segmasks = core.dilate_masks(segmasks, dilation)
return core.combine_masks(segmasks)
def setAux(self, x):
pass