File size: 5,719 Bytes
89c278d d06defe e9d702e 89c278d d06defe 89c278d d06defe 89c278d d06defe 89c278d d06defe 89c278d e9d702e d06defe 89c278d d06defe 89c278d e9d702e 89c278d e9d702e 89c278d e9d702e 89c278d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
import io
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image
from iopaint.single_processing import batch_inpaint_cv2
import gradio as gr
from bgremover import process
# set current working directory cache instead of default
os.environ["TORCH_HOME"] = "./pretrained-model"
os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model"
def resize_image(input_image_path, width=640, height=640):
"""Resizes an image from image data and returns the resized image."""
try:
# Read the image using cv2.imread
img = cv2.imread(input_image_path, cv2.IMREAD_COLOR)
# Resize while maintaining the aspect ratio
shape = img.shape[:2] # current shape [height, width]
new_shape = (width, height) # the shape to resize to
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
# Resize the image
im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
# Pad the image
color = (114, 114, 114) # color used for padding
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
# divide padding into 2 sides
dw /= 2
dh /= 2
# compute padding on all corners
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im
except Exception as e:
raise gr.Error("Error in resizing image!")
def process_images(input_image, append_image, default_class="chair"):
if not input_image:
raise gr.Error("Please upload a main image.")
if not append_image:
raise gr.Error("Please upload an object image.")
# Resize input image and get base64 data of resized image
img = resize_image(input_image)
if img is None:
raise gr.Error("Failed to decode resized image!")
H, W, _ = img.shape
x_point = 0
y_point = 0
width = 1
height = 1
# Load a model
model = YOLO('pretrained-model/yolov8m-seg.pt') # pretrained YOLOv8m-seg model
# Run batched inference on a list of images
results = model(img, imgsz=(W,H), conf=0.5) # chair class 56 with confidence >= 0.5
names = model.names
class_found = False
for result in results:
for i, label in enumerate(result.boxes.cls):
# Check if the label matches the chair label
if names[int(label)] == default_class:
class_found = True
# Convert the tensor to a numpy array
chair_mask_np = result.masks.data[i].numpy()
kernel = np.ones((5, 5), np.uint8) # Create a 5x5 kernel for dilation
chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2) # Apply dilation
# Find contours to get bounding box
contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Iterate over contours to find the bounding box of each object
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
x_point = x
y_point = y
width = w
height = h
# Get the corresponding mask
mask = result.masks.data[i].numpy() * 255
dilated_mask = cv2.dilate(mask, kernel, iterations=2) # Apply dilation
# Resize the mask to match the dimensions of the original image
resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0]))
# call repainting and merge function
output_numpy = repaitingAndMerge(append_image,width, height, x_point, y_point, img, resized_mask)
# Return the output numpy image in the API response
return output_numpy
# return class not found in prediction
if not class_found:
raise gr.Error(f'{default_class} object not found in the image')
def repaitingAndMerge(append_image_path, width, height, xposition, yposition, input_base, mask_base):
# lama inpainting start
print("lama inpainting start")
inpaint_result_np = batch_inpaint_cv2('lama', 'cpu', input_base, mask_base)
print("lama inpainting end")
# Create PIL Image from NumPy array
final_image = Image.fromarray(inpaint_result_np)
print("merge start")
# Load the append image using cv2.imread
append_image = cv2.imread(append_image_path, cv2.IMREAD_UNCHANGED)
# Resize the append image while preserving transparency
resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA)
# Convert the resized image to RGBA format (assuming it's in BGRA format)
resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA)
# Create a PIL Image from the resized image with transparent background
#append_image_pil = Image.fromarray(resized_image)
# remove the bg from image
append_image_pil = process(resized_image)
# Paste the append image onto the final image
final_image.paste(append_image_pil, (xposition, yposition), append_image_pil)
# Save the resulting image
print("merge end")
# Convert the final image to base64
with io.BytesIO() as output_buffer:
final_image.save(output_buffer, format='PNG')
output_numpy = np.array(final_image)
return output_numpy
|