Sohaib36's picture
add: adding model helper
e502daa
import numpy as np
import torch
import fusion
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
def read_calib(calib_path):
"""
Modify from https://github.com/utiasSTARS/pykitti/blob/d3e1bb81676e831886726cc5ed79ce1f049aef2c/pykitti/utils.py#L68
:param calib_path: Path to a calibration text file.
:return: dict with calibration matrices.
"""
calib_all = {}
with open(calib_path, "r") as f:
for line in f.readlines():
if line == "\n":
break
key, value = line.split(":", 1)
calib_all[key] = np.array([float(x) for x in value.split()])
# reshape matrices
calib_out = {}
# 3x4 projection matrix for left camera
calib_out["P2"] = calib_all["P2"].reshape(3, 4)
calib_out["Tr"] = np.identity(4) # 4x4 matrix
calib_out["Tr"][:3, :4] = calib_all["Tr"].reshape(3, 4)
return calib_out
def vox2pix(cam_E, cam_k,
vox_origin, voxel_size,
img_W, img_H,
scene_size):
"""
compute the 2D projection of voxels centroids
Parameters:
----------
cam_E: 4x4
=camera pose in case of NYUv2 dataset
=Transformation from camera to lidar coordinate in case of SemKITTI
cam_k: 3x3
camera intrinsics
vox_origin: (3,)
world(NYU)/lidar(SemKITTI) cooridnates of the voxel at index (0, 0, 0)
img_W: int
image width
img_H: int
image height
scene_size: (3,)
scene size in meter: (51.2, 51.2, 6.4) for SemKITTI and (4.8, 4.8, 2.88) for NYUv2
Returns
-------
projected_pix: (N, 2)
Projected 2D positions of voxels
fov_mask: (N,)
Voxels mask indice voxels inside image's FOV
pix_z: (N,)
Voxels'distance to the sensor in meter
"""
# Compute the x, y, z bounding of the scene in meter
vol_bnds = np.zeros((3, 2))
vol_bnds[:, 0] = vox_origin
vol_bnds[:, 1] = vox_origin + np.array(scene_size)
# Compute the voxels centroids in lidar cooridnates
vol_dim = np.ceil((vol_bnds[:, 1] - vol_bnds[:, 0]) /
voxel_size).copy(order='C').astype(int)
xv, yv, zv = np.meshgrid(
range(vol_dim[0]),
range(vol_dim[1]),
range(vol_dim[2]),
indexing='ij'
)
vox_coords = np.concatenate([
xv.reshape(1, -1),
yv.reshape(1, -1),
zv.reshape(1, -1)
], axis=0).astype(int).T
# Project voxels'centroid from lidar coordinates to camera coordinates
cam_pts = fusion.TSDFVolume.vox2world(vox_origin, vox_coords, voxel_size)
cam_pts = fusion.rigid_transform(cam_pts, cam_E)
# Project camera coordinates to pixel positions
projected_pix = fusion.TSDFVolume.cam2pix(cam_pts, cam_k)
pix_x, pix_y = projected_pix[:, 0], projected_pix[:, 1]
# Eliminate pixels outside view frustum
pix_z = cam_pts[:, 2]
fov_mask = np.logical_and(pix_x >= 0,
np.logical_and(pix_x < img_W,
np.logical_and(pix_y >= 0,
np.logical_and(pix_y < img_H,
pix_z > 0))))
return torch.from_numpy(projected_pix), torch.from_numpy(fov_mask), torch.from_numpy(pix_z)
def get_grid_coords(dims, resolution):
"""
:param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32])
:return coords_grid: is the center coords of voxels in the grid
"""
g_xx = np.arange(0, dims[0] + 1)
g_yy = np.arange(0, dims[1] + 1)
sensor_pose = 10
g_zz = np.arange(0, dims[2] + 1)
# Obtaining the grid with coords...
xx, yy, zz = np.meshgrid(g_xx[:-1], g_yy[:-1], g_zz[:-1])
coords_grid = np.array([xx.flatten(), yy.flatten(), zz.flatten()]).T
coords_grid = coords_grid.astype(np.float)
coords_grid = (coords_grid * resolution) + resolution / 2
temp = np.copy(coords_grid)
temp[:, 0] = coords_grid[:, 1]
temp[:, 1] = coords_grid[:, 0]
coords_grid = np.copy(temp)
return coords_grid
def get_projections(img_W, img_H):
scale_3ds = [1, 2]
data = {}
for scale_3d in scale_3ds:
scene_size = (4.8, 4.8, 2.88)
vox_origin = np.array([-1.54591799, 0.8907361, -0.05])
voxel_size = 0.08
calib = read_calib("/monoscene/MonoScene/calib.txt")
cam_k = np.array([[518.8579, 0, 320], [0, 518.8579, 240], [0, 0, 1]])
cam_pose = np.asarray([[9.6699458e-01, 4.2662762e-02, 2.5120059e-01, 0.0000000e+00],
[-2.5147417e-01, 1.0867463e-03,
9.6786356e-01, 0.0000000e+00],
[4.1018680e-02, -9.9908894e-01,
1.1779292e-02, 1.1794727e+00],
[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00]])
T_velo_2_cam = np.linalg.inv(cam_pose)
# compute the 3D-2D mapping
projected_pix, fov_mask, pix_z = vox2pix(
T_velo_2_cam,
cam_k,
vox_origin,
voxel_size * scale_3d,
img_W,
img_H,
scene_size,
)
data["projected_pix_{}".format(scale_3d)] = projected_pix
data["pix_z_{}".format(scale_3d)] = pix_z
data["fov_mask_{}".format(scale_3d)] = fov_mask
return data
def majority_pooling(grid, k_size=2):
result = np.zeros(
(grid.shape[0] // k_size, grid.shape[1] //
k_size, grid.shape[2] // k_size)
)
for xx in range(0, int(np.floor(grid.shape[0] / k_size))):
for yy in range(0, int(np.floor(grid.shape[1] / k_size))):
for zz in range(0, int(np.floor(grid.shape[2] / k_size))):
sub_m = grid[
(xx * k_size): (xx * k_size) + k_size,
(yy * k_size): (yy * k_size) + k_size,
(zz * k_size): (zz * k_size) + k_size,
]
unique, counts = np.unique(sub_m, return_counts=True)
if True in ((unique != 0) & (unique != 255)):
# Remove counts with 0 and 255
counts = counts[((unique != 0) & (unique != 255))]
unique = unique[((unique != 0) & (unique != 255))]
else:
if True in (unique == 0):
counts = counts[(unique != 255)]
unique = unique[(unique != 255)]
value = unique[np.argmax(counts)]
result[xx, yy, zz] = value
return result
def get_grid_coords(dims, resolution):
"""
:param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32])
:return coords_grid: is the center coords of voxels in the grid
"""
g_xx = np.arange(0, dims[0] + 1)
g_yy = np.arange(0, dims[1] + 1)
g_zz = np.arange(0, dims[2] + 1)
# Obtaining the grid with coords...
xx, yy, zz = np.meshgrid(g_xx[:-1], g_yy[:-1], g_zz[:-1])
coords_grid = np.array([xx.flatten(), yy.flatten(), zz.flatten()]).T
coords_grid = coords_grid.astype(np.float)
coords_grid = (coords_grid * resolution) + resolution / 2
temp = np.copy(coords_grid)
temp[:, 0] = coords_grid[:, 1]
temp[:, 1] = coords_grid[:, 0]
coords_grid = np.copy(temp)
return coords_grid
def draw(
voxels,
cam_pose,
vox_origin,
voxel_size=0.08,
d=0.75, # 0.75m - determine the size of the mesh representing the camera
):
# Compute the coordinates of the mesh representing camera
y = d * 480 / (2 * 518.8579)
x = d * 640 / (2 * 518.8579)
tri_points = np.array(
[
[0, 0, 0],
[x, y, d],
[-x, y, d],
[-x, -y, d],
[x, -y, d],
]
)
tri_points = np.hstack([tri_points, np.ones((5, 1))])
tri_points = (cam_pose @ tri_points.T).T
x = tri_points[:, 0] - vox_origin[0]
y = tri_points[:, 1] - vox_origin[1]
z = tri_points[:, 2] - vox_origin[2]
triangles = [
(0, 1, 2),
(0, 1, 4),
(0, 3, 4),
(0, 2, 3),
]
# Compute the voxels coordinates
grid_coords = get_grid_coords(
[voxels.shape[0], voxels.shape[2], voxels.shape[1]], voxel_size
)
# Attach the predicted class to every voxel
grid_coords = np.vstack(
(grid_coords.T, np.moveaxis(voxels, [0, 1, 2], [0, 2, 1]).reshape(-1))
).T
# Remove empty and unknown voxels
occupied_voxels = grid_coords[(grid_coords[:, 3] > 0) & (grid_coords[:, 3] < 255)]
colors = np.array(
[
[22, 191, 206, 255],
[214, 38, 40, 255],
[43, 160, 43, 255],
[158, 216, 229, 255],
[114, 158, 206, 255],
[204, 204, 91, 255],
[255, 186, 119, 255],
[147, 102, 188, 255],
[30, 119, 181, 255],
[188, 188, 33, 255],
[255, 127, 12, 255],
[196, 175, 214, 255],
[153, 153, 153, 255],
]
).astype(np.uint8)
pts_colors = [
f'rgb({colors[int(i)][0]}, {colors[int(i)][1]}, {colors[int(i)][2]})' for i in occupied_voxels[:, 3]]
out_fov_colors = [
f'rgb({colors[int(i)][0]//3*2}, {colors[int(i)][1]//3*2}, {colors[int(i)][2]//3*2})' for i in occupied_voxels[:, 3]]
pts_colors = pts_colors + out_fov_colors
fig = go.Figure(data=[go.Scatter3d(x=occupied_voxels[:, 0], y=occupied_voxels[:, 1], z=occupied_voxels[:, 2], mode='markers',
marker=dict(
size=4,
color=pts_colors, # set color to an array/list of desired values
opacity=1.0,
symbol='square'
))])
fig.update_layout(
scene=dict(
aspectmode='data',
yaxis=dict(visible=False, showticklabels=False),
bgcolor="black",
),
)
return fig