Delete utils

Browse files

Files changed (11) hide show

utils/batch_size.py +0 -63
utils/colormap.py +0 -45
utils/common.py +0 -42
utils/dataset_configuration.py +0 -81
utils/de_normalized.py +0 -33
utils/depth2normal.py +0 -186
utils/depth_ensemble.py +0 -115
utils/image_util.py +0 -83
utils/normal_ensemble.py +0 -22
utils/seed_all.py +0 -33
utils/surface_normal.py +0 -213

utils/batch_size.py DELETED Viewed

@@ -1,63 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import torch
-import math
-# Search table for suggested max. inference batch size
-bs_search_table = [
-    # tested on A100-PCIE-80GB
-    {"res": 768, "total_vram": 79, "bs": 35, "dtype": torch.float32},
-    {"res": 1024, "total_vram": 79, "bs": 20, "dtype": torch.float32},
-    # tested on A100-PCIE-40GB
-    {"res": 768, "total_vram": 39, "bs": 15, "dtype": torch.float32},
-    {"res": 1024, "total_vram": 39, "bs": 8, "dtype": torch.float32},
-    {"res": 768, "total_vram": 39, "bs": 30, "dtype": torch.float16},
-    {"res": 1024, "total_vram": 39, "bs": 15, "dtype": torch.float16},
-    # tested on RTX3090, RTX4090
-    {"res": 512, "total_vram": 23, "bs": 20, "dtype": torch.float32},
-    {"res": 768, "total_vram": 23, "bs": 7, "dtype": torch.float32},
-    {"res": 1024, "total_vram": 23, "bs": 3, "dtype": torch.float32},
-    {"res": 512, "total_vram": 23, "bs": 40, "dtype": torch.float16},
-    {"res": 768, "total_vram": 23, "bs": 18, "dtype": torch.float16},
-    {"res": 1024, "total_vram": 23, "bs": 10, "dtype": torch.float16},
-    # tested on GTX1080Ti
-    {"res": 512, "total_vram": 10, "bs": 5, "dtype": torch.float32},
-    {"res": 768, "total_vram": 10, "bs": 2, "dtype": torch.float32},
-    {"res": 512, "total_vram": 10, "bs": 10, "dtype": torch.float16},
-    {"res": 768, "total_vram": 10, "bs": 5, "dtype": torch.float16},
-    {"res": 1024, "total_vram": 10, "bs": 3, "dtype": torch.float16},
-]
-def find_batch_size(ensemble_size: int, input_res: int, dtype: torch.dtype) -> int:
-    """
-    Automatically search for suitable operating batch size.
-    Args:
-        ensemble_size (`int`):
-            Number of predictions to be ensembled.
-        input_res (`int`):
-            Operating resolution of the input image.
-    Returns:
-        `int`: Operating batch size.
-    """
-    if not torch.cuda.is_available():
-        return 1
-    total_vram = torch.cuda.mem_get_info()[1] / 1024.0**3
-    filtered_bs_search_table = [s for s in bs_search_table if s["dtype"] == dtype]
-    for settings in sorted(
-        filtered_bs_search_table,
-        key=lambda k: (k["res"], -k["total_vram"]),
-    ):
-        if input_res <= settings["res"] and total_vram >= settings["total_vram"]:
-            bs = settings["bs"]
-            if bs > ensemble_size:
-                bs = ensemble_size
-            elif bs > math.ceil(ensemble_size / 2) and bs < ensemble_size:
-                bs = math.ceil(ensemble_size / 2)
-            return bs
-    return 1

utils/colormap.py DELETED Viewed

@@ -1,45 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import numpy as np
-import cv2
-def kitti_colormap(disparity, maxval=-1):
-	"""
-	A utility function to reproduce KITTI fake colormap
-	Arguments:
-	  - disparity: numpy float32 array of dimension HxW
-	  - maxval: maximum disparity value for normalization (if equal to -1, the maximum value in disparity will be used)
-	Returns a numpy uint8 array of shape HxWx3.
-	"""
-	if maxval < 0:
-		maxval = np.max(disparity)
-	colormap = np.asarray([[0,0,0,114],[0,0,1,185],[1,0,0,114],[1,0,1,174],[0,1,0,114],[0,1,1,185],[1,1,0,114],[1,1,1,0]])
-	weights = np.asarray([8.771929824561404,5.405405405405405,8.771929824561404,5.747126436781609,8.771929824561404,5.405405405405405,8.771929824561404,0])
-	cumsum = np.asarray([0,0.114,0.299,0.413,0.587,0.701,0.8859999999999999,0.9999999999999999])
-	colored_disp = np.zeros([disparity.shape[0], disparity.shape[1], 3])
-	values = np.expand_dims(np.minimum(np.maximum(disparity/maxval, 0.), 1.), -1)
-	bins = np.repeat(np.repeat(np.expand_dims(np.expand_dims(cumsum,axis=0),axis=0), disparity.shape[1], axis=1), disparity.shape[0], axis=0)
-	diffs = np.where((np.repeat(values, 8, axis=-1) - bins) > 0, -1000, (np.repeat(values, 8, axis=-1) - bins))
-	index = np.argmax(diffs, axis=-1)-1
-	w = 1-(values[:,:,0]-cumsum[index])*np.asarray(weights)[index]
-	colored_disp[:,:,2] = (w*colormap[index][:,:,0] + (1.-w)*colormap[index+1][:,:,0])
-	colored_disp[:,:,1] = (w*colormap[index][:,:,1] + (1.-w)*colormap[index+1][:,:,1])
-	colored_disp[:,:,0] = (w*colormap[index][:,:,2] + (1.-w)*colormap[index+1][:,:,2])
-	return (colored_disp*np.expand_dims((disparity>0),-1)*255).astype(np.uint8)
-def read_16bit_gt(path):
-	"""
-	A utility function to read KITTI 16bit gt
-	Arguments:
-	  - path: filepath
-	Returns a numpy float32 array of shape HxW.
-	"""
-	gt = cv2.imread(path,-1).astype(np.float32)/256.
-	return gt

utils/common.py DELETED Viewed

@@ -1,42 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import json
-import yaml
-import logging
-import os
-import numpy as np
-import sys
-def load_loss_scheme(loss_config):
-    with open(loss_config, 'r') as f:
-        loss_json = yaml.safe_load(f)
-    return loss_json
-DEBUG =0
-logger = logging.getLogger()
-if DEBUG:
-    #coloredlogs.install(level='DEBUG')
-    logger.setLevel(logging.DEBUG)
-else:
-    #coloredlogs.install(level='INFO')
-    logger.setLevel(logging.INFO)
-strhdlr = logging.StreamHandler()
-logger.addHandler(strhdlr)
-formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s %(message)s')
-strhdlr.setFormatter(formatter)
-def count_parameters(model):
-    return sum(p.numel() for p in model.parameters() if p.requires_grad)
-def check_path(path):
-    if not os.path.exists(path):
-        os.makedirs(path, exist_ok=True)

utils/dataset_configuration.py DELETED Viewed

@@ -1,81 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import sys
-sys.path.append("..")
-from dataloader.mix_loader import MixDataset
-from torch.utils.data import DataLoader
-from dataloader import transforms
-import os
-# Get Dataset Here
-def prepare_dataset(data_dir=None,
-                    batch_size=1,
-                    test_batch=1,
-                    datathread=4,
-                    logger=None):
-    # set the config parameters
-    dataset_config_dict = dict()
-    train_dataset = MixDataset(data_dir=data_dir)
-    img_height, img_width = train_dataset.get_img_size()
-    datathread = datathread
-    if os.environ.get('datathread') is not None:
-        datathread = int(os.environ.get('datathread'))
-    if logger is not None:
-        logger.info("Use %d processes to load data..." % datathread)
-    train_loader = DataLoader(train_dataset, batch_size = batch_size, \
-                            shuffle = True, num_workers = datathread, \
-                            pin_memory = True)
-    num_batches_per_epoch = len(train_loader)
-    dataset_config_dict['num_batches_per_epoch'] = num_batches_per_epoch
-    dataset_config_dict['img_size'] = (img_height,img_width)
-    return train_loader, dataset_config_dict
-def depth_scale_shift_normalization(depth):
-    bsz = depth.shape[0]
-    depth_ = depth[:,0,:,:].reshape(bsz,-1).cpu().numpy()
-    min_value = torch.from_numpy(np.percentile(a=depth_,q=2,axis=1)).to(depth)[...,None,None,None]
-    max_value = torch.from_numpy(np.percentile(a=depth_,q=98,axis=1)).to(depth)[...,None,None,None]
-    normalized_depth = ((depth - min_value)/(max_value-min_value+1e-5) - 0.5) * 2
-    normalized_depth = torch.clip(normalized_depth, -1., 1.)
-    return normalized_depth
-def resize_max_res_tensor(input_tensor, mode, recom_resolution=768):
-    assert input_tensor.shape[1]==3
-    original_H, original_W = input_tensor.shape[2:]
-    downscale_factor = min(recom_resolution/original_H, recom_resolution/original_W)
-    if mode == 'normal':
-        resized_input_tensor = F.interpolate(input_tensor,
-                                            scale_factor=downscale_factor,
-                                            mode='nearest')
-    else:
-        resized_input_tensor = F.interpolate(input_tensor,
-                                            scale_factor=downscale_factor,
-                                            mode='bilinear',
-                                            align_corners=False)
-    if mode == 'depth':
-        return resized_input_tensor / downscale_factor
-    else:
-        return resized_input_tensor

utils/de_normalized.py DELETED Viewed

@@ -1,33 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import numpy as np
-from scipy.optimize import least_squares
-import torch
-def align_scale_shift(pred, target, clip_max):
-    mask = (target > 0) & (target < clip_max)
-    if mask.sum() > 10:
-        target_mask = target[mask]
-        pred_mask = pred[mask]
-        scale, shift = np.polyfit(pred_mask, target_mask, deg=1)
-        return scale, shift
-    else:
-        return 1, 0
-def align_scale(pred: torch.tensor, target: torch.tensor):
-    mask = target > 0
-    if torch.sum(mask) > 10:
-        scale = torch.median(target[mask]) / (torch.median(pred[mask]) + 1e-8)
-    else:
-        scale = 1
-    pred_scale = pred * scale
-    return pred_scale, scale
-def align_shift(pred: torch.tensor, target: torch.tensor):
-    mask = target > 0
-    if torch.sum(mask) > 10:
-        shift = torch.median(target[mask]) - (torch.median(pred[mask]) + 1e-8)
-    else:
-        shift = 0
-    pred_shift = pred + shift
-    return pred_shift, shift

utils/depth2normal.py DELETED Viewed

@@ -1,186 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import pickle
-import os
-import h5py
-import numpy as np
-import cv2
-import torch
-import torch.nn as nn
-import glob
-def init_image_coor(height, width):
-    x_row = np.arange(0, width)
-    x = np.tile(x_row, (height, 1))
-    x = x[np.newaxis, :, :]
-    x = x.astype(np.float32)
-    x = torch.from_numpy(x.copy()).cuda()
-    u_u0 = x - width/2.0
-    y_col = np.arange(0, height)  # y_col = np.arange(0, height)
-    y = np.tile(y_col, (width, 1)).T
-    y = y[np.newaxis, :, :]
-    y = y.astype(np.float32)
-    y = torch.from_numpy(y.copy()).cuda()
-    v_v0 = y - height/2.0
-    return u_u0, v_v0
-def depth_to_xyz(depth, focal_length):
-    b, c, h, w = depth.shape
-    u_u0, v_v0 = init_image_coor(h, w)
-    x = u_u0 * depth / focal_length[0]
-    y = v_v0 * depth / focal_length[1]
-    z = depth
-    pw = torch.cat([x, y, z], 1).permute(0, 2, 3, 1) # [b, h, w, c]
-    return pw
-def get_surface_normal(xyz, patch_size=5):
-    # xyz: [1, h, w, 3]
-    x, y, z = torch.unbind(xyz, dim=3)
-    x = torch.unsqueeze(x, 0)
-    y = torch.unsqueeze(y, 0)
-    z = torch.unsqueeze(z, 0)
-    xx = x * x
-    yy = y * y
-    zz = z * z
-    xy = x * y
-    xz = x * z
-    yz = y * z
-    patch_weight = torch.ones((1, 1, patch_size, patch_size), requires_grad=False).cuda()
-    xx_patch = nn.functional.conv2d(xx, weight=patch_weight, padding=int(patch_size / 2))
-    yy_patch = nn.functional.conv2d(yy, weight=patch_weight, padding=int(patch_size / 2))
-    zz_patch = nn.functional.conv2d(zz, weight=patch_weight, padding=int(patch_size / 2))
-    xy_patch = nn.functional.conv2d(xy, weight=patch_weight, padding=int(patch_size / 2))
-    xz_patch = nn.functional.conv2d(xz, weight=patch_weight, padding=int(patch_size / 2))
-    yz_patch = nn.functional.conv2d(yz, weight=patch_weight, padding=int(patch_size / 2))
-    ATA = torch.stack([xx_patch, xy_patch, xz_patch, xy_patch, yy_patch, yz_patch, xz_patch, yz_patch, zz_patch],
-                      dim=4)
-    ATA = torch.squeeze(ATA)
-    ATA = torch.reshape(ATA, (ATA.size(0), ATA.size(1), 3, 3))
-    eps_identity = 1e-6 * torch.eye(3, device=ATA.device, dtype=ATA.dtype)[None, None, :, :].repeat([ATA.size(0), ATA.size(1), 1, 1])
-    ATA = ATA + eps_identity
-    x_patch = nn.functional.conv2d(x, weight=patch_weight, padding=int(patch_size / 2))
-    y_patch = nn.functional.conv2d(y, weight=patch_weight, padding=int(patch_size / 2))
-    z_patch = nn.functional.conv2d(z, weight=patch_weight, padding=int(patch_size / 2))
-    AT1 = torch.stack([x_patch, y_patch, z_patch], dim=4)
-    AT1 = torch.squeeze(AT1)
-    AT1 = torch.unsqueeze(AT1, 3)
-    patch_num = 4
-    patch_x = int(AT1.size(1) / patch_num)
-    patch_y = int(AT1.size(0) / patch_num)
-    n_img = torch.randn(AT1.shape).cuda()
-    overlap = patch_size // 2 + 1
-    for x in range(int(patch_num)):
-        for y in range(int(patch_num)):
-            left_flg = 0 if x == 0 else 1
-            right_flg = 0 if x == patch_num -1 else 1
-            top_flg = 0 if y == 0 else 1
-            btm_flg = 0 if y == patch_num - 1 else 1
-            at1 = AT1[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
-                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
-            ata = ATA[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
-                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
-            # n_img_tmp, _ = torch.solve(at1, ata)
-            n_img_tmp = torch.linalg.solve(ata, at1)
-            n_img_tmp_select = n_img_tmp[top_flg * overlap:patch_y + top_flg * overlap, left_flg * overlap:patch_x + left_flg * overlap, :, :]
-            n_img[y * patch_y:y * patch_y + patch_y, x * patch_x:x * patch_x + patch_x, :, :] = n_img_tmp_select
-    n_img_L2 = torch.sqrt(torch.sum(n_img ** 2, dim=2, keepdim=True))
-    n_img_norm = n_img / n_img_L2
-    # re-orient normals consistently
-    orient_mask = torch.sum(torch.squeeze(n_img_norm) * torch.squeeze(xyz), dim=2) > 0
-    n_img_norm[orient_mask] *= -1
-    return n_img_norm
-def get_surface_normalv2(xyz, patch_size=5):
-    """
-    xyz: xyz coordinates
-    patch: [p1, p2, p3,
-            p4, p5, p6,
-            p7, p8, p9]
-    surface_normal = [(p9-p1) x (p3-p7)] + [(p6-p4) - (p8-p2)]
-    return: normal [h, w, 3, b]
-    """
-    b, h, w, c = xyz.shape
-    half_patch = patch_size // 2
-    xyz_pad = torch.zeros((b, h + patch_size - 1, w + patch_size - 1, c), dtype=xyz.dtype, device=xyz.device)
-    xyz_pad[:, half_patch:-half_patch, half_patch:-half_patch, :] = xyz
-    # xyz_left_top = xyz_pad[:, :h, :w, :]  # p1
-    # xyz_right_bottom = xyz_pad[:, -h:, -w:, :]# p9
-    # xyz_left_bottom = xyz_pad[:, -h:, :w, :]   # p7
-    # xyz_right_top = xyz_pad[:, :h, -w:, :]  # p3
-    # xyz_cross1 = xyz_left_top - xyz_right_bottom  # p1p9
-    # xyz_cross2 = xyz_left_bottom - xyz_right_top  # p7p3
-    xyz_left = xyz_pad[:, half_patch:half_patch + h, :w, :]  # p4
-    xyz_right = xyz_pad[:, half_patch:half_patch + h, -w:, :]  # p6
-    xyz_top = xyz_pad[:, :h, half_patch:half_patch + w, :]  # p2
-    xyz_bottom = xyz_pad[:, -h:, half_patch:half_patch + w, :]  # p8
-    xyz_horizon = xyz_left - xyz_right  # p4p6
-    xyz_vertical = xyz_top - xyz_bottom  # p2p8
-    xyz_left_in = xyz_pad[:, half_patch:half_patch + h, 1:w+1, :]  # p4
-    xyz_right_in = xyz_pad[:, half_patch:half_patch + h, patch_size-1:patch_size-1+w, :]  # p6
-    xyz_top_in = xyz_pad[:, 1:h+1, half_patch:half_patch + w, :]  # p2
-    xyz_bottom_in = xyz_pad[:, patch_size-1:patch_size-1+h, half_patch:half_patch + w, :]  # p8
-    xyz_horizon_in = xyz_left_in - xyz_right_in  # p4p6
-    xyz_vertical_in = xyz_top_in - xyz_bottom_in  # p2p8
-    n_img_1 = torch.cross(xyz_horizon_in, xyz_vertical_in, dim=3)
-    n_img_2 = torch.cross(xyz_horizon, xyz_vertical, dim=3)
-    # re-orient normals consistently
-    orient_mask = torch.sum(n_img_1 * xyz, dim=3) > 0
-    n_img_1[orient_mask] *= -1
-    orient_mask = torch.sum(n_img_2 * xyz, dim=3) > 0
-    n_img_2[orient_mask] *= -1
-    n_img1_L2 = torch.sqrt(torch.sum(n_img_1 ** 2, dim=3, keepdim=True))
-    n_img1_norm = n_img_1 / (n_img1_L2 + 1e-8)
-    n_img2_L2 = torch.sqrt(torch.sum(n_img_2 ** 2, dim=3, keepdim=True))
-    n_img2_norm = n_img_2 / (n_img2_L2 + 1e-8)
-    # average 2 norms
-    n_img_aver = n_img1_norm + n_img2_norm
-    n_img_aver_L2 = torch.sqrt(torch.sum(n_img_aver ** 2, dim=3, keepdim=True))
-    n_img_aver_norm = n_img_aver / (n_img_aver_L2 + 1e-8)
-    # re-orient normals consistently
-    orient_mask = torch.sum(n_img_aver_norm * xyz, dim=3) > 0
-    n_img_aver_norm[orient_mask] *= -1
-    n_img_aver_norm_out = n_img_aver_norm.permute((1, 2, 3, 0))  # [h, w, c, b]
-    # a = torch.sum(n_img1_norm_out*n_img2_norm_out, dim=2).cpu().numpy().squeeze()
-    # plt.imshow(np.abs(a), cmap='rainbow')
-    # plt.show()
-    return n_img_aver_norm_out#n_img1_norm.permute((1, 2, 3, 0))
-def surface_normal_from_depth(depth, focal_length, valid_mask=None):
-    # para depth: depth map, [b, c, h, w]
-    b, c, h, w = depth.shape
-    focal_length = focal_length[:, None, None, None]
-    depth_filter = nn.functional.avg_pool2d(depth, kernel_size=3, stride=1, padding=1)
-    #depth_filter = nn.functional.avg_pool2d(depth_filter, kernel_size=3, stride=1, padding=1)
-    xyz = depth_to_xyz(depth_filter, focal_length)
-    sn_batch = []
-    for i in range(b):
-        xyz_i = xyz[i, :][None, :, :, :]
-        #normal = get_surface_normalv2(xyz_i)
-        normal = get_surface_normal(xyz_i)
-        sn_batch.append(normal)
-    sn_batch = torch.cat(sn_batch, dim=3).permute((3, 2, 0, 1))  # [b, c, h, w]
-    if valid_mask != None:
-        mask_invalid = (~valid_mask).repeat(1, 3, 1, 1)
-        sn_batch[mask_invalid] = 0.0
-    return sn_batch

utils/depth_ensemble.py DELETED Viewed

@@ -1,115 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import numpy as np
-import torch
-from scipy.optimize import minimize
-def inter_distances(tensors: torch.Tensor):
-    """
-    To calculate the distance between each two depth maps.
-    """
-    distances = []
-    for i, j in torch.combinations(torch.arange(tensors.shape[0])):
-        arr1 = tensors[i : i + 1]
-        arr2 = tensors[j : j + 1]
-        distances.append(arr1 - arr2)
-    dist = torch.concat(distances, dim=0)
-    return dist
-def ensemble_depths(input_images:torch.Tensor,
-                    regularizer_strength: float =0.02,
-                    max_iter: int =2,
-                    tol:float =1e-3,
-                    reduction: str='median',
-                    max_res: int=None):
-    """
-    To ensemble multiple affine-invariant depth images (up to scale and shift),
-        by aligning estimating the scale and shift
-    """
-    device = input_images.device
-    dtype = input_images.dtype
-    np_dtype = np.float32
-    original_input = input_images.clone()
-    n_img = input_images.shape[0]
-    ori_shape = input_images.shape
-    if max_res is not None:
-        scale_factor = torch.min(max_res / torch.tensor(ori_shape[-2:]))
-        if scale_factor < 1:
-            downscaler = torch.nn.Upsample(scale_factor=scale_factor, mode="nearest")
-            input_images = downscaler(torch.from_numpy(input_images)).numpy()
-    # init guess
-    _min = np.min(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) # get the min value of each possible depth
-    _max = np.max(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) # get the max value of each possible depth
-    s_init = 1.0 / (_max - _min).reshape((-1, 1, 1)) #(10,1,1) : re-scale'f scale
-    t_init = (-1 * s_init.flatten() * _min.flatten()).reshape((-1, 1, 1)) #(10,1,1)
-    x = np.concatenate([s_init, t_init]).reshape(-1).astype(np_dtype) #(20,)
-    input_images = input_images.to(device)
-    # objective function
-    def closure(x):
-        l = len(x)
-        s = x[: int(l / 2)]
-        t = x[int(l / 2) :]
-        s = torch.from_numpy(s).to(dtype=dtype).to(device)
-        t = torch.from_numpy(t).to(dtype=dtype).to(device)
-        transformed_arrays = input_images * s.view((-1, 1, 1)) + t.view((-1, 1, 1))
-        dists = inter_distances(transformed_arrays)
-        sqrt_dist = torch.sqrt(torch.mean(dists**2))
-        if "mean" == reduction:
-            pred = torch.mean(transformed_arrays, dim=0)
-        elif "median" == reduction:
-            pred = torch.median(transformed_arrays, dim=0).values
-        else:
-            raise ValueError
-        near_err = torch.sqrt((0 - torch.min(pred)) ** 2)
-        far_err = torch.sqrt((1 - torch.max(pred)) ** 2)
-        err = sqrt_dist + (near_err + far_err) * regularizer_strength
-        err = err.detach().cpu().numpy().astype(np_dtype)
-        return err
-    res = minimize(
-        closure, x, method="BFGS", tol=tol, options={"maxiter": max_iter, "disp": False}
-    )
-    x = res.x
-    l = len(x)
-    s = x[: int(l / 2)]
-    t = x[int(l / 2) :]
-    # Prediction
-    s = torch.from_numpy(s).to(dtype=dtype).to(device)
-    t = torch.from_numpy(t).to(dtype=dtype).to(device)
-    transformed_arrays = original_input * s.view(-1, 1, 1) + t.view(-1, 1, 1) #[10,H,W]
-    if "mean" == reduction:
-        aligned_images = torch.mean(transformed_arrays, dim=0)
-        std = torch.std(transformed_arrays, dim=0)
-        uncertainty = std
-    elif "median" == reduction:
-        aligned_images = torch.median(transformed_arrays, dim=0).values
-        # MAD (median absolute deviation) as uncertainty indicator
-        abs_dev = torch.abs(transformed_arrays - aligned_images)
-        mad = torch.median(abs_dev, dim=0).values
-        uncertainty = mad
-    # Scale and shift to [0, 1]
-    _min = torch.min(aligned_images)
-    _max = torch.max(aligned_images)
-    aligned_images = (aligned_images - _min) / (_max - _min)
-    uncertainty /= _max - _min
-    return aligned_images, uncertainty

utils/image_util.py DELETED Viewed

@@ -1,83 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import matplotlib
-import numpy as np
-import torch
-from PIL import Image
-def resize_max_res(img: Image.Image, max_edge_resolution: int) -> Image.Image:
-    """
-    Resize image to limit maximum edge length while keeping aspect ratio.
-    Args:
-        img (`Image.Image`):
-            Image to be resized.
-        max_edge_resolution (`int`):
-            Maximum edge length (pixel).
-    Returns:
-        `Image.Image`: Resized image.
-    """
-    original_width, original_height = img.size
-    downscale_factor = min(
-        max_edge_resolution / original_width, max_edge_resolution / original_height
-    )
-    new_width = int(original_width * downscale_factor)
-    new_height = int(original_height * downscale_factor)
-    resized_img = img.resize((new_width, new_height))
-    return resized_img
-def colorize_depth_maps(
-    depth_map, min_depth, max_depth, cmap="Spectral", valid_mask=None
-):
-    """
-    Colorize depth maps.
-    """
-    assert len(depth_map.shape) >= 2, "Invalid dimension"
-    if isinstance(depth_map, torch.Tensor):
-        depth = depth_map.detach().clone().squeeze().numpy()
-    elif isinstance(depth_map, np.ndarray):
-        depth = depth_map.copy().squeeze()
-    # reshape to [ (B,) H, W ]
-    if depth.ndim < 3:
-        depth = depth[np.newaxis, :, :]
-    # colorize
-    cm = matplotlib.colormaps[cmap]
-    depth = ((depth - min_depth) / (max_depth - min_depth)).clip(0, 1)
-    img_colored_np = cm(depth, bytes=False)[:, :, :, 0:3]  # value from 0 to 1
-    img_colored_np = np.rollaxis(img_colored_np, 3, 1)
-    if valid_mask is not None:
-        if isinstance(depth_map, torch.Tensor):
-            valid_mask = valid_mask.detach().numpy()
-        valid_mask = valid_mask.squeeze()  # [H, W] or [B, H, W]
-        if valid_mask.ndim < 3:
-            valid_mask = valid_mask[np.newaxis, np.newaxis, :, :]
-        else:
-            valid_mask = valid_mask[:, np.newaxis, :, :]
-        valid_mask = np.repeat(valid_mask, 3, axis=1)
-        img_colored_np[~valid_mask] = 0
-    if isinstance(depth_map, torch.Tensor):
-        img_colored = torch.from_numpy(img_colored_np).float()
-    elif isinstance(depth_map, np.ndarray):
-        img_colored = img_colored_np
-    return img_colored
-def chw2hwc(chw):
-    assert 3 == len(chw.shape)
-    if isinstance(chw, torch.Tensor):
-        hwc = torch.permute(chw, (1, 2, 0))
-    elif isinstance(chw, np.ndarray):
-        hwc = np.moveaxis(chw, 0, -1)
-    return hwc

utils/normal_ensemble.py DELETED Viewed

@@ -1,22 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import numpy as np
-import torch
-def ensemble_normals(input_images:torch.Tensor):
-    normal_preds = input_images
-    bsz, d, h, w = normal_preds.shape
-    normal_preds = normal_preds / (torch.norm(normal_preds, p=2, dim=1).unsqueeze(1)+1e-5)
-    phi = torch.atan2(normal_preds[:,1,:,:], normal_preds[:,0,:,:]).mean(dim=0)
-    theta = torch.atan2(torch.norm(normal_preds[:,:2,:,:], p=2, dim=1), normal_preds[:,2,:,:]).mean(dim=0)
-    normal_pred = torch.zeros((d,h,w)).to(normal_preds)
-    normal_pred[0,:,:] = torch.sin(theta) * torch.cos(phi)
-    normal_pred[1,:,:] = torch.sin(theta) * torch.sin(phi)
-    normal_pred[2,:,:] = torch.cos(theta)
-    angle_error = torch.acos(torch.cosine_similarity(normal_pred[None], normal_preds, dim=1))
-    normal_idx = torch.argmin(angle_error.reshape(bsz,-1).sum(-1))
-    return normal_preds[normal_idx]

utils/seed_all.py DELETED Viewed

@@ -1,33 +0,0 @@
-# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# --------------------------------------------------------------------------
-# If you find this code useful, we kindly ask you to cite our paper in your work.
-# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
-# More information about the method can be found at https://marigoldmonodepth.github.io
-# --------------------------------------------------------------------------
-import numpy as np
-import random
-import torch
-def seed_all(seed: int = 0):
-    """
-    Set random seeds of all components.
-    """
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)

utils/surface_normal.py DELETED Viewed

@@ -1,213 +0,0 @@
-# A reimplemented version in public environments by Xiao Fu and Mu Hu
-import torch
-import numpy as np
-import torch.nn as nn
-def init_image_coor(height, width):
-    x_row = np.arange(0, width)
-    x = np.tile(x_row, (height, 1))
-    x = x[np.newaxis, :, :]
-    x = x.astype(np.float32)
-    x = torch.from_numpy(x.copy()).cuda()
-    u_u0 = x - width/2.0
-    y_col = np.arange(0, height)  # y_col = np.arange(0, height)
-    y = np.tile(y_col, (width, 1)).T
-    y = y[np.newaxis, :, :]
-    y = y.astype(np.float32)
-    y = torch.from_numpy(y.copy()).cuda()
-    v_v0 = y - height/2.0
-    return u_u0, v_v0
-def depth_to_xyz(depth, focal_length):
-    b, c, h, w = depth.shape
-    u_u0, v_v0 = init_image_coor(h, w)
-    x = u_u0 * depth / focal_length
-    y = v_v0 * depth / focal_length
-    z = depth
-    pw = torch.cat([x, y, z], 1).permute(0, 2, 3, 1) # [b, h, w, c]
-    return pw
-def get_surface_normal(xyz, patch_size=3):
-    # xyz: [1, h, w, 3]
-    x, y, z = torch.unbind(xyz, dim=3)
-    x = torch.unsqueeze(x, 0)
-    y = torch.unsqueeze(y, 0)
-    z = torch.unsqueeze(z, 0)
-    xx = x * x
-    yy = y * y
-    zz = z * z
-    xy = x * y
-    xz = x * z
-    yz = y * z
-    patch_weight = torch.ones((1, 1, patch_size, patch_size), requires_grad=False).cuda()
-    xx_patch = nn.functional.conv2d(xx, weight=patch_weight, padding=int(patch_size / 2))
-    yy_patch = nn.functional.conv2d(yy, weight=patch_weight, padding=int(patch_size / 2))
-    zz_patch = nn.functional.conv2d(zz, weight=patch_weight, padding=int(patch_size / 2))
-    xy_patch = nn.functional.conv2d(xy, weight=patch_weight, padding=int(patch_size / 2))
-    xz_patch = nn.functional.conv2d(xz, weight=patch_weight, padding=int(patch_size / 2))
-    yz_patch = nn.functional.conv2d(yz, weight=patch_weight, padding=int(patch_size / 2))
-    ATA = torch.stack([xx_patch, xy_patch, xz_patch, xy_patch, yy_patch, yz_patch, xz_patch, yz_patch, zz_patch],
-                      dim=4)
-    ATA = torch.squeeze(ATA)
-    ATA = torch.reshape(ATA, (ATA.size(0), ATA.size(1), 3, 3))
-    eps_identity = 1e-6 * torch.eye(3, device=ATA.device, dtype=ATA.dtype)[None, None, :, :].repeat([ATA.size(0), ATA.size(1), 1, 1])
-    ATA = ATA + eps_identity
-    x_patch = nn.functional.conv2d(x, weight=patch_weight, padding=int(patch_size / 2))
-    y_patch = nn.functional.conv2d(y, weight=patch_weight, padding=int(patch_size / 2))
-    z_patch = nn.functional.conv2d(z, weight=patch_weight, padding=int(patch_size / 2))
-    AT1 = torch.stack([x_patch, y_patch, z_patch], dim=4)
-    AT1 = torch.squeeze(AT1)
-    AT1 = torch.unsqueeze(AT1, 3)
-    patch_num = 4
-    patch_x = int(AT1.size(1) / patch_num)
-    patch_y = int(AT1.size(0) / patch_num)
-    n_img = torch.randn(AT1.shape).cuda()
-    overlap = patch_size // 2 + 1
-    for x in range(int(patch_num)):
-        for y in range(int(patch_num)):
-            left_flg = 0 if x == 0 else 1
-            right_flg = 0 if x == patch_num -1 else 1
-            top_flg = 0 if y == 0 else 1
-            btm_flg = 0 if y == patch_num - 1 else 1
-            at1 = AT1[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
-                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
-            ata = ATA[y * patch_y - top_flg * overlap:(y + 1) * patch_y + btm_flg * overlap,
-                  x * patch_x - left_flg * overlap:(x + 1) * patch_x + right_flg * overlap]
-            n_img_tmp, _ = torch.solve(at1, ata)
-            n_img_tmp_select = n_img_tmp[top_flg * overlap:patch_y + top_flg * overlap, left_flg * overlap:patch_x + left_flg * overlap, :, :]
-            n_img[y * patch_y:y * patch_y + patch_y, x * patch_x:x * patch_x + patch_x, :, :] = n_img_tmp_select
-    n_img_L2 = torch.sqrt(torch.sum(n_img ** 2, dim=2, keepdim=True))
-    n_img_norm = n_img / n_img_L2
-    # re-orient normals consistently
-    orient_mask = torch.sum(torch.squeeze(n_img_norm) * torch.squeeze(xyz), dim=2) > 0
-    n_img_norm[orient_mask] *= -1
-    return n_img_norm
-def get_surface_normalv2(xyz, patch_size=3):
-    """
-    xyz: xyz coordinates
-    patch: [p1, p2, p3,
-            p4, p5, p6,
-            p7, p8, p9]
-    surface_normal = [(p9-p1) x (p3-p7)] + [(p6-p4) - (p8-p2)]
-    return: normal [h, w, 3, b]
-    """
-    b, h, w, c = xyz.shape
-    half_patch = patch_size // 2
-    xyz_pad = torch.zeros((b, h + patch_size - 1, w + patch_size - 1, c), dtype=xyz.dtype, device=xyz.device)
-    xyz_pad[:, half_patch:-half_patch, half_patch:-half_patch, :] = xyz
-    # xyz_left_top = xyz_pad[:, :h, :w, :]  # p1
-    # xyz_right_bottom = xyz_pad[:, -h:, -w:, :]# p9
-    # xyz_left_bottom = xyz_pad[:, -h:, :w, :]   # p7
-    # xyz_right_top = xyz_pad[:, :h, -w:, :]  # p3
-    # xyz_cross1 = xyz_left_top - xyz_right_bottom  # p1p9
-    # xyz_cross2 = xyz_left_bottom - xyz_right_top  # p7p3
-    xyz_left = xyz_pad[:, half_patch:half_patch + h, :w, :]  # p4
-    xyz_right = xyz_pad[:, half_patch:half_patch + h, -w:, :]  # p6
-    xyz_top = xyz_pad[:, :h, half_patch:half_patch + w, :]  # p2
-    xyz_bottom = xyz_pad[:, -h:, half_patch:half_patch + w, :]  # p8
-    xyz_horizon = xyz_left - xyz_right  # p4p6
-    xyz_vertical = xyz_top - xyz_bottom  # p2p8
-    xyz_left_in = xyz_pad[:, half_patch:half_patch + h, 1:w+1, :]  # p4
-    xyz_right_in = xyz_pad[:, half_patch:half_patch + h, patch_size-1:patch_size-1+w, :]  # p6
-    xyz_top_in = xyz_pad[:, 1:h+1, half_patch:half_patch + w, :]  # p2
-    xyz_bottom_in = xyz_pad[:, patch_size-1:patch_size-1+h, half_patch:half_patch + w, :]  # p8
-    xyz_horizon_in = xyz_left_in - xyz_right_in  # p4p6
-    xyz_vertical_in = xyz_top_in - xyz_bottom_in  # p2p8
-    n_img_1 = torch.cross(xyz_horizon_in, xyz_vertical_in, dim=3)
-    n_img_2 = torch.cross(xyz_horizon, xyz_vertical, dim=3)
-    # re-orient normals consistently
-    orient_mask = torch.sum(n_img_1 * xyz, dim=3) > 0
-    n_img_1[orient_mask] *= -1
-    orient_mask = torch.sum(n_img_2 * xyz, dim=3) > 0
-    n_img_2[orient_mask] *= -1
-    n_img1_L2 = torch.sqrt(torch.sum(n_img_1 ** 2, dim=3, keepdim=True))
-    n_img1_norm = n_img_1 / (n_img1_L2 + 1e-8)
-    n_img2_L2 = torch.sqrt(torch.sum(n_img_2 ** 2, dim=3, keepdim=True))
-    n_img2_norm = n_img_2 / (n_img2_L2 + 1e-8)
-    # average 2 norms
-    n_img_aver = n_img1_norm + n_img2_norm
-    n_img_aver_L2 = torch.sqrt(torch.sum(n_img_aver ** 2, dim=3, keepdim=True))
-    n_img_aver_norm = n_img_aver / (n_img_aver_L2 + 1e-8)
-    # re-orient normals consistently
-    orient_mask = torch.sum(n_img_aver_norm * xyz, dim=3) > 0
-    n_img_aver_norm[orient_mask] *= -1
-    n_img_aver_norm_out = n_img_aver_norm.permute((1, 2, 3, 0))  # [h, w, c, b]
-    # a = torch.sum(n_img1_norm_out*n_img2_norm_out, dim=2).cpu().numpy().squeeze()
-    # plt.imshow(np.abs(a), cmap='rainbow')
-    # plt.show()
-    return n_img_aver_norm_out#n_img1_norm.permute((1, 2, 3, 0))
-def surface_normal_from_depth(depth, focal_length, valid_mask=None):
-    # para depth: depth map, [b, c, h, w]
-    b, c, h, w = depth.shape
-    focal_length = focal_length[:, None, None, None]
-    depth_filter = nn.functional.avg_pool2d(depth, kernel_size=3, stride=1, padding=1)
-    depth_filter = nn.functional.avg_pool2d(depth_filter, kernel_size=3, stride=1, padding=1)
-    xyz = depth_to_xyz(depth_filter, focal_length)
-    sn_batch = []
-    for i in range(b):
-        xyz_i = xyz[i, :][None, :, :, :]
-        normal = get_surface_normalv2(xyz_i)
-        sn_batch.append(normal)
-    sn_batch = torch.cat(sn_batch, dim=3).permute((3, 2, 0, 1))  # [b, c, h, w]
-    mask_invalid = (~valid_mask).repeat(1, 3, 1, 1)
-    sn_batch[mask_invalid] = 0.0
-    return sn_batch
-def vis_normal(normal):
-    """
-    Visualize surface normal. Transfer surface normal value from [-1, 1] to [0, 255]
-    @para normal: surface normal, [h, w, 3], numpy.array
-    """
-    n_img_L2 = np.sqrt(np.sum(normal ** 2, axis=2, keepdims=True))
-    n_img_norm = normal / (n_img_L2 + 1e-8)
-    normal_vis = n_img_norm * 127
-    normal_vis += 128
-    normal_vis = normal_vis.astype(np.uint8)
-    return normal_vis
-def vis_normal2(normals):
-    '''
-    Montage of normal maps. Vectors are unit length and backfaces thresholded.
-    '''
-    x = normals[:, :, 0] # horizontal; pos right
-    y = normals[:, :, 1] # depth; pos far
-    z = normals[:, :, 2] # vertical; pos up
-    backfacing = (z > 0)
-    norm = np.sqrt(np.sum(normals**2, axis=2))
-    zero = (norm < 1e-5)
-    x += 1.0; x *= 0.5
-    y += 1.0; y *= 0.5
-    z = np.abs(z)
-    x[zero] = 0.0
-    y[zero] = 0.0
-    z[zero] = 0.0
-    normals[:, :, 0] = x  # horizontal; pos right
-    normals[:, :, 1] = y  # depth; pos far
-    normals[:, :, 2] = z # vertical; pos up
-    return normals
-if __name__ == '__main__':
-    import cv2, os