Spaces:
Running
on
L40S
Running
on
L40S
import tempfile | |
from pathlib import Path | |
import numpy as np | |
import onnxruntime as ort | |
import torch | |
from PIL import Image | |
from ..errors import ModelNotFound | |
from ..log import mklog | |
from ..utils import ( | |
get_model_path, | |
tensor2pil, | |
tiles_infer, | |
tiles_merge, | |
tiles_split, | |
) | |
# Disable MS telemetry | |
ort.disable_telemetry_events() | |
log = mklog(__name__) | |
# - COLOR to NORMALS | |
def color_to_normals( | |
color_img, overlap, progress_callback, *, save_temp=False | |
): | |
"""Compute a normal map from the given color map. | |
'color_img' must be a numpy array in C,H,W format (with C as RGB). | |
'overlap' must be one of 'SMALL', 'MEDIUM', 'LARGE'. | |
""" | |
temp_dir = Path(tempfile.mkdtemp()) if save_temp else None | |
# Remove alpha & convert to grayscale | |
img = np.mean(color_img[:3], axis=0, keepdims=True) | |
if temp_dir: | |
Image.fromarray((img[0] * 255).astype(np.uint8)).save( | |
temp_dir / "grayscale_img.png" | |
) | |
log.debug( | |
"Converting color image to grayscale by taking " | |
f"the mean over color channels: {img.shape}" | |
) | |
# Split image in tiles | |
log.debug("DeepBump Color → Normals : tilling") | |
tile_size = 256 | |
overlaps = { | |
"SMALL": tile_size // 6, | |
"MEDIUM": tile_size // 4, | |
"LARGE": tile_size // 2, | |
} | |
stride_size = tile_size - overlaps[overlap] | |
tiles, paddings = tiles_split( | |
img, (tile_size, tile_size), (stride_size, stride_size) | |
) | |
if temp_dir: | |
for i, tile in enumerate(tiles): | |
Image.fromarray((tile[0] * 255).astype(np.uint8)).save( | |
temp_dir / f"tile_{i}.png" | |
) | |
# Load model | |
log.debug("DeepBump Color → Normals : loading model") | |
model = get_model_path("deepbump", "deepbump256.onnx") | |
if not model or not model.exists(): | |
raise ModelNotFound(f"deepbump ({model})") | |
providers = [ | |
"TensorrtExecutionProvider", | |
"CUDAExecutionProvider", | |
"CoreMLProvider", | |
"CPUExecutionProvider", | |
] | |
available_providers = [ | |
provider | |
for provider in providers | |
if provider in ort.get_available_providers() | |
] | |
if not available_providers: | |
raise RuntimeError( | |
"No valid ONNX Runtime providers available on this machine." | |
) | |
log.debug(f"Using ONNX providers: {available_providers}") | |
ort_session = ort.InferenceSession( | |
model.as_posix(), providers=available_providers | |
) | |
# Predict normal map for each tile | |
log.debug("DeepBump Color → Normals : generating") | |
pred_tiles = tiles_infer( | |
tiles, ort_session, progress_callback=progress_callback | |
) | |
if temp_dir: | |
for i, pred_tile in enumerate(pred_tiles): | |
Image.fromarray( | |
(pred_tile.transpose(1, 2, 0) * 255).astype(np.uint8) | |
).save(temp_dir / f"pred_tile_{i}.png") | |
# Merge tiles | |
log.debug("DeepBump Color → Normals : merging") | |
pred_img = tiles_merge( | |
pred_tiles, | |
(stride_size, stride_size), | |
(3, img.shape[1], img.shape[2]), | |
paddings, | |
) | |
if temp_dir: | |
Image.fromarray( | |
(pred_img.transpose(1, 2, 0) * 255).astype(np.uint8) | |
).save(temp_dir / "merged_img.png") | |
# Normalize each pixel to unit vector | |
pred_img = normalize(pred_img) | |
if temp_dir: | |
Image.fromarray( | |
(pred_img.transpose(1, 2, 0) * 255).astype(np.uint8) | |
).save(temp_dir / "final_img.png") | |
log.debug(f"Debug images saved in {temp_dir}") | |
return pred_img | |
# - NORMALS to CURVATURE | |
def conv_1d(array, kernel_1d): | |
"""Perform row by row 1D convolutions. | |
of the given 2D image with the given 1D kernel. | |
""" | |
# Input kernel length must be odd | |
k_l = len(kernel_1d) | |
assert k_l % 2 != 0 | |
# Convolution is repeat-padded | |
extended = np.pad(array, k_l // 2, mode="wrap") | |
# Output has same size as input (padded, valid-mode convolution) | |
output = np.empty(array.shape) | |
for i in range(array.shape[0]): | |
output[i] = np.convolve( | |
extended[i + (k_l // 2)], kernel_1d, mode="valid" | |
) | |
return output * -1 | |
def gaussian_kernel(length, sigma): | |
"""Return a 1D gaussian kernel of size 'length'.""" | |
space = np.linspace(-(length - 1) / 2, (length - 1) / 2, length) | |
kernel = np.exp(-0.5 * np.square(space) / np.square(sigma)) | |
return kernel / np.sum(kernel) | |
def normalize(np_array): | |
"""Normalize all elements of the given numpy array to [0,1].""" | |
return (np_array - np.min(np_array)) / ( | |
np.max(np_array) - np.min(np_array) | |
) | |
def normals_to_curvature(normals_img, blur_radius, progress_callback): | |
"""Compute a curvature map from the given normal map. | |
'normals_img' must be a numpy array in C,H,W format (with C as RGB). | |
'blur_radius' must be one of: | |
'SMALLEST', 'SMALLER', 'SMALL', 'MEDIUM', 'LARGE', 'LARGER', 'LARGEST'. | |
""" | |
# Convolutions on normal map red & green channels | |
if progress_callback is not None: | |
progress_callback(0, 4) | |
diff_kernel = np.array([-1, 0, 1]) | |
h_conv = conv_1d(normals_img[0, :, :], diff_kernel) | |
if progress_callback is not None: | |
progress_callback(1, 4) | |
v_conv = conv_1d(-1 * normals_img[1, :, :].T, diff_kernel).T | |
if progress_callback is not None: | |
progress_callback(2, 4) | |
# Sum detected edges | |
edges_conv = h_conv + v_conv | |
# Blur radius size is proportional to img sizes | |
blur_factors = { | |
"SMALLEST": 1 / 256, | |
"SMALLER": 1 / 128, | |
"SMALL": 1 / 64, | |
"MEDIUM": 1 / 32, | |
"LARGE": 1 / 16, | |
"LARGER": 1 / 8, | |
"LARGEST": 1 / 4, | |
} | |
if blur_radius not in blur_factors: | |
raise ValueError(f"{blur_radius} not found in {blur_factors}") | |
blur_radius_px = int( | |
np.mean(normals_img.shape[1:3]) * blur_factors[blur_radius] | |
) | |
# If blur radius too small, do not blur | |
if blur_radius_px < 2: | |
edges_conv = normalize(edges_conv) | |
return np.stack([edges_conv, edges_conv, edges_conv]) | |
# Make sure blur kernel length is odd | |
if blur_radius_px % 2 == 0: | |
blur_radius_px += 1 | |
# Blur curvature with separated convolutions | |
sigma = blur_radius_px // 8 | |
if sigma == 0: | |
sigma = 1 | |
g_kernel = gaussian_kernel(blur_radius_px, sigma) | |
h_blur = conv_1d(edges_conv, g_kernel) | |
if progress_callback is not None: | |
progress_callback(3, 4) | |
v_blur = conv_1d(h_blur.T, g_kernel).T | |
if progress_callback is not None: | |
progress_callback(4, 4) | |
# Normalize to [0,1] | |
curvature = normalize(v_blur) | |
# Expand single channel the three channels (RGB) | |
return np.stack([curvature, curvature, curvature]) | |
# - NORMALS to HEIGHT | |
def normals_to_grad(normals_img): | |
return (normals_img[0] - 0.5) * 2, (normals_img[1] - 0.5) * 2 | |
def copy_flip(grad_x, grad_y): | |
"""Concat 4 flipped copies of input gradients (makes them wrap). | |
Output is twice bigger in both dimensions. | |
""" | |
grad_x_top = np.hstack([grad_x, -np.flip(grad_x, axis=1)]) | |
grad_x_bottom = np.hstack([np.flip(grad_x, axis=0), -np.flip(grad_x)]) | |
new_grad_x = np.vstack([grad_x_top, grad_x_bottom]) | |
grad_y_top = np.hstack([grad_y, np.flip(grad_y, axis=1)]) | |
grad_y_bottom = np.hstack([-np.flip(grad_y, axis=0), -np.flip(grad_y)]) | |
new_grad_y = np.vstack([grad_y_top, grad_y_bottom]) | |
return new_grad_x, new_grad_y | |
def frankot_chellappa(grad_x, grad_y, progress_callback=None): | |
"""Frankot-Chellappa depth-from-gradient algorithm.""" | |
if progress_callback is not None: | |
progress_callback(0, 3) | |
rows, cols = grad_x.shape | |
rows_scale = (np.arange(rows) - (rows // 2 + 1)) / (rows - rows % 2) | |
cols_scale = (np.arange(cols) - (cols // 2 + 1)) / (cols - cols % 2) | |
u_grid, v_grid = np.meshgrid(cols_scale, rows_scale) | |
u_grid = np.fft.ifftshift(u_grid) | |
v_grid = np.fft.ifftshift(v_grid) | |
if progress_callback is not None: | |
progress_callback(1, 3) | |
grad_x_F = np.fft.fft2(grad_x) | |
grad_y_F = np.fft.fft2(grad_y) | |
if progress_callback is not None: | |
progress_callback(2, 3) | |
nominator = (-1j * u_grid * grad_x_F) + (-1j * v_grid * grad_y_F) | |
denominator = (u_grid**2) + (v_grid**2) + 1e-16 | |
Z_F = nominator / denominator | |
Z_F[0, 0] = 0.0 | |
Z = np.real(np.fft.ifft2(Z_F)) | |
if progress_callback is not None: | |
progress_callback(3, 3) | |
return (Z - np.min(Z)) / (np.max(Z) - np.min(Z)) | |
def normals_to_height(normals_img, seamless, progress_callback): | |
"""Computes a height map from the given normal map. 'normals_img' must be a numpy array | |
in C,H,W format (with C as RGB). 'seamless' is a bool that should indicates if 'normals_img' | |
is seamless. | |
""" | |
# Flip height axis | |
flip_img = np.flip(normals_img, axis=1) | |
# Get gradients from normal map | |
grad_x, grad_y = normals_to_grad(flip_img) | |
grad_x = np.flip(grad_x, axis=0) | |
grad_y = np.flip(grad_y, axis=0) | |
# If non-seamless chosen, expand gradients | |
if not seamless: | |
grad_x, grad_y = copy_flip(grad_x, grad_y) | |
# Compute height | |
pred_img = frankot_chellappa( | |
-grad_x, grad_y, progress_callback=progress_callback | |
) | |
# Cut to valid part if gradients were expanded | |
if not seamless: | |
height, width = normals_img.shape[1], normals_img.shape[2] | |
pred_img = pred_img[:height, :width] | |
# Expand single channel the three channels (RGB) | |
return np.stack([pred_img, pred_img, pred_img]) | |
# - ADDON | |
class MTB_DeepBump: | |
"""Normal & height maps generation from single pictures""" | |
def INPUT_TYPES(cls): | |
return { | |
"required": { | |
"image": ("IMAGE",), | |
"mode": ( | |
[ | |
"Color to Normals", | |
"Normals to Curvature", | |
"Normals to Height", | |
], | |
), | |
"color_to_normals_overlap": (["SMALL", "MEDIUM", "LARGE"],), | |
"normals_to_curvature_blur_radius": ( | |
[ | |
"SMALLEST", | |
"SMALLER", | |
"SMALL", | |
"MEDIUM", | |
"LARGE", | |
"LARGER", | |
"LARGEST", | |
], | |
), | |
"normals_to_height_seamless": ("BOOLEAN", {"default": True}), | |
}, | |
} | |
RETURN_TYPES = ("IMAGE",) | |
FUNCTION = "apply" | |
CATEGORY = "mtb/textures" | |
def apply( | |
self, | |
*, | |
image, | |
mode="Color to Normals", | |
color_to_normals_overlap="SMALL", | |
normals_to_curvature_blur_radius="SMALL", | |
normals_to_height_seamless=True, | |
): | |
images = tensor2pil(image) | |
out_images = [] | |
for image in images: | |
log.debug(f"Input image shape: {image}") | |
in_img = np.transpose(image, (2, 0, 1)) / 255 | |
log.debug(f"transposed for deep image shape: {in_img.shape}") | |
out_img = None | |
# Apply processing | |
if mode == "Color to Normals": | |
out_img = color_to_normals( | |
in_img, color_to_normals_overlap, None | |
) | |
if mode == "Normals to Curvature": | |
out_img = normals_to_curvature( | |
in_img, normals_to_curvature_blur_radius, None | |
) | |
if mode == "Normals to Height": | |
out_img = normals_to_height( | |
in_img, normals_to_height_seamless, None | |
) | |
if out_img is not None: | |
log.debug(f"Output image shape: {out_img.shape}") | |
out_images.append( | |
torch.from_numpy( | |
np.transpose(out_img, (1, 2, 0)).astype(np.float32) | |
).unsqueeze(0) | |
) | |
else: | |
log.error("No out img... This should not happen") | |
for outi in out_images: | |
log.debug(f"Shape fed to utils: {outi.shape}") | |
return (torch.cat(out_images, dim=0),) | |
__nodes__ = [MTB_DeepBump] | |