opdmulti-demo / mask2former /utils /motion_visualizer.py
atwang's picture
[NOT TESTED] initial implementation of app
01664b3
from fvcore.common.file_io import PathManager
from detectron2.utils.visualizer import (
Visualizer,
ColorMode,
_create_text_labels,
GenericMask,
)
from detectron2.structures import (
BitMasks,
Boxes,
BoxMode,
Keypoints,
PolygonMasks,
RotatedBoxes,
)
from detectron2.utils.colormap import random_color
from PIL import Image
import numpy as np
from numpy.linalg import norm
import math
MOTION_TYPE = {0: "rotation", 1: "translation"}
_COLORS_CAT = {
0: np.array([166, 206, 227]) / 255,
1: np.array([31, 120, 180]) / 255,
2: np.array([202, 178, 214]) / 255,
3: np.array([106, 61, 154]) / 255,
4: np.array([178, 223, 138]) / 255,
5: np.array([51, 160, 44]) / 255,
}
_COLORS_LEVEL = {
0: np.array([0, 255, 0]) / 255,
1: np.array([255, 128, 0]) / 255,
2: np.array([255, 0, 0]) / 255,
}
def getFocalLength(FOV, height, width=None):
# FOV is in radius, should be vertical angle
if width == None:
f = height / (2 * math.tan(FOV / 2))
return f
else:
fx = height / (2 * math.tan(FOV / 2))
fy = fx / height * width
return (fx, fy)
def camera_to_image(point, is_real=False, intrinsic_matrix=None):
point_camera = np.array(point)
# Calculate the camera intrinsic parameters (they are fixed in this project)
if not is_real:
# Below is for the MoionNet synthetic dataset intrinsic
FOV = 50
img_width = img_height = 256
fx, fy = getFocalLength(FOV / 180 * math.pi, img_height, img_width)
cy = img_height / 2
cx = img_width / 2
x = point_camera[0] * fx / (-point_camera[2]) + cx
y = -(point_camera[1] * fy / (-point_camera[2])) + cy
else:
# Below is the for MotionREAL dataset
point_2d = np.dot(intrinsic_matrix, point_camera[:3])
x = point_2d[0] / point_2d[2]
y = point_2d[1] / point_2d[2]
return (x, y)
def rotation_from_vectors(source, dest):
a, b = (source / np.linalg.norm(source)).reshape(3), (
dest / np.linalg.norm(dest)
).reshape(3)
v = np.cross(a, b)
c = np.dot(a, b)
s = np.linalg.norm(v)
kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
rmat = np.eye(3) + kmat + np.matmul(kmat, kmat) * ((1 - c) / (s ** 2))
return rmat
def rotatePoint(x, y, angle, scale):
rad = np.pi * angle / 180
x2 = np.cos(rad) * x - np.sin(rad) * y
y2 = np.sin(rad) * x + np.cos(rad) * y
return [x2 * scale, y2 * scale]
def circlePoints(axis, radius=0.5, num=50):
angles = np.linspace(0, 2 * np.pi, num, endpoint=False)
x_vec = np.cos(angles) * radius
y_vec = np.sin(angles) * radius
z_vec = np.zeros_like(x_vec) + 0.5
points = np.stack((x_vec, y_vec, z_vec), axis=0)
rot = rotation_from_vectors(np.array([0, 0, 1]), np.asarray(axis))
points = np.matmul(rot, points)
return points
def get_iou(bb1, bb2):
x_left = max(bb1[0], bb2[0])
y_top = max(bb1[1], bb2[1])
x_right = min(bb1[0] + bb1[2], bb2[0] + bb2[2])
y_bottom = min(bb1[1] + bb1[3], bb2[1] + bb2[3])
if x_right < x_left or y_bottom < y_top:
return 0.0
area = (x_right - x_left) * (y_bottom - y_top)
bb1_area = bb1[2] * bb1[3]
bb2_area = bb2[2] * bb2[3]
iou = area / float(bb1_area + bb2_area - area)
return iou
class MotionVisualizer(Visualizer):
def draw_gt_instance(self, anno, part_id_json, is_real=False, intrinsic_matrix=None, line_length=1):
# All annotations have been in the camera coordinate
masks = [anno["segmentation"]]
boxes = [BoxMode.convert(anno["bbox"], anno["bbox_mode"], BoxMode.XYXY_ABS)]
labels = [anno["category_id"]]
colors = None
if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
"thing_colors"
):
colors = [
self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
for c in labels
]
origins = [anno["motion"]["current_origin"]]
# Calculate the 2d origin (Only consider draw only one origin)
origins_4d = [origin[:] + [1] for origin in origins]
origin_2d = [camera_to_image(origin, is_real, intrinsic_matrix) for origin in origins_4d]
axises = [anno["motion"]["current_axis"]]
new_point = list(np.array(origins[0]) + line_length * np.array(axises[0]))
new_point = new_point[:] + [1]
new_point = camera_to_image(new_point, is_real, intrinsic_matrix)
arrow_p0 = rotatePoint(
new_point[0] - origin_2d[0][0], new_point[1] - origin_2d[0][1], 30, 0.1
)
arrow_p1 = rotatePoint(
new_point[0] - origin_2d[0][0], new_point[1] - origin_2d[0][1], -30, 0.1
)
circle_p = circlePoints(axises[0], 0.1, 50)
circle_p = line_length * circle_p + np.repeat(
np.asarray(origins[0])[:, np.newaxis], 50, axis=1
)
circle_p = circle_p.transpose()
circle_p_2d = np.asarray([camera_to_image(p, is_real, intrinsic_matrix) for p in circle_p])
self.draw_line(
[origin_2d[0][0], new_point[0]],
[origin_2d[0][1], new_point[1]],
color=_COLORS_LEVEL[0],
linewidth=2,
)
self.draw_line(
[new_point[0] - arrow_p0[0], new_point[0]],
[new_point[1] - arrow_p0[1], new_point[1]],
color=_COLORS_LEVEL[0],
linewidth=2,
)
self.draw_line(
[new_point[0] - arrow_p1[0], new_point[0]],
[new_point[1] - arrow_p1[1], new_point[1]],
color=_COLORS_LEVEL[0],
linewidth=2,
)
self.draw_polygon(
circle_p_2d, color=_COLORS_LEVEL[0], edge_color=_COLORS_LEVEL[0], alpha=0.0
)
mtype = 0 if anno["motion"]["type"] == "rotation" else 1
if not mtype:
self.draw_circle(origin_2d[0], color=_COLORS_LEVEL[0], radius=5)
names = self.metadata.get("thing_classes", None)
if names:
labels = [names[i] + "_" + anno["motion"]["type"] for i in labels]
labels = [
"{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
for i, a in zip(labels, [anno])
]
cat_id = anno["category_id"]
self.overlay_instances(
labels=labels,
boxes=boxes,
masks=masks,
assigned_colors=[_COLORS_CAT[cat_id * 2 + mtype]],
)
part_id_json["partId"] = anno["motion"]["partId"]
part_id_json["type"] = anno["motion"]["type"]
part_id_json["category_id"] = anno["category_id"]
return self.output
def draw_prior(self, anno):
# All annotations have been in the camera coordinate
labels = [0]
origin = anno["start"]
origin_2d = anno["start_2d"]
new_point = anno["end_2d"]
axises = [anno["axises"]]
print(axises)
projection = anno["projMat"]
arrow_p0 = rotatePoint(
new_point[0] - origin_2d[0], new_point[1] - origin_2d[1], 30, 0.1
)
arrow_p1 = rotatePoint(
new_point[0] - origin_2d[0], new_point[1] - origin_2d[1], -30, 0.1
)
circle_p = circlePoints(axises[0], 0.1, 50)
circle_p = circle_p + np.repeat(np.asarray(origin)[:, np.newaxis], 50, axis=1)
# circle_p = circle_p.transpose()
circle_p = np.vstack((circle_p, np.ones(circle_p.shape[1])))
circle_p_2d = np.dot(projection, circle_p)
circle_p_2d = circle_p_2d / circle_p_2d[3, :]
circle_p_2d = circle_p_2d[:2, :]
circle_p_2d[0, :] = (circle_p_2d[0, :] + 1) / 2 * anno["img_size"]
circle_p_2d[1, :] = (-circle_p_2d[1, :] + 1) / 2 * anno["img_size"]
circle_p_2d = circle_p_2d.transpose()
axis_diff = anno["error"]
if axis_diff <= 2:
axis_color = _COLORS_LEVEL[0]
elif axis_diff > 2 and axis_diff <= 10:
axis_color = _COLORS_LEVEL[1]
elif axis_diff > 10:
axis_color = _COLORS_LEVEL[2]
print(axis_diff)
self.draw_line(
[origin_2d[0], new_point[0]],
[origin_2d[1], new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_line(
[new_point[0] - arrow_p0[0], new_point[0]],
[new_point[1] - arrow_p0[1], new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_line(
[new_point[0] - arrow_p1[0], new_point[0]],
[new_point[1] - arrow_p1[1], new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_polygon(
circle_p_2d, color=axis_color, edge_color=axis_color, alpha=0.0
)
mtype = 1
if not mtype:
self.draw_circle(origin_2d, color=_COLORS_LEVEL[0], radius=5)
cat_id = 0
labels = [
"{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "")
for i, a in zip(labels, [anno])
]
# self.overlay_instances(
# labels=labels, boxes=None, masks=None, assigned_colors=[_COLORS_CAT[cat_id*2+mtype]]
# )
return self.output
def draw_pred_instance(self, prediction, d, match, is_real=False, intrinsic_matrix=None, line_length=1, no_mask=False, diagonal_length=-1):
if "annotations" in d:
boxes = prediction.get("bbox", None)
anno = None
annos = d["annotations"]
max_iou = -1
if not len(annos):
return None
for gt_anno in annos:
iou = get_iou(gt_anno["bbox"], boxes)
if np.isnan(iou):
return False
if iou > max_iou:
max_iou = iou
anno = gt_anno
else:
max_iou = -1
boxes = prediction.get("bbox", None)
anno = d
boxes = prediction.get("bbox", None)
iou = get_iou(anno["bbox"], boxes)
if iou > max_iou:
max_iou = iou
boxes = [BoxMode.convert(boxes, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)]
# Based on the motion type, determine to visualize the predicted motion origin or gt motion origin
# For translation joint, the motion origin is meaningless
pred_type = prediction["mtype"]
if pred_type == 1:
pred_origin = anno["motion"]["current_origin"]
else:
pred_origin = prediction["morigin"]
# Prepare the predicted origin and predicted axis
pred_origin_4d = pred_origin + [1]
pred_origin_2d = camera_to_image(pred_origin_4d, is_real, intrinsic_matrix)
pred_axis = np.array(prediction["maxis"])
pred_axis = list(pred_axis / norm(pred_axis))
pred_new_point = list(np.array(pred_origin) + line_length * np.array(pred_axis))
pred_new_point = pred_new_point + [1]
pred_new_point = camera_to_image(pred_new_point, is_real, intrinsic_matrix)
# Prepare the gt origin and gt axis
gt_origin = anno["motion"]["current_origin"]
gt_origin_4d = gt_origin + [1]
gt_origin_2d = camera_to_image(gt_origin_4d, is_real, intrinsic_matrix)
gt_axis = anno["motion"][
"current_axis"
] # gt_axis has been normalized in the annotation
gt_new_point = list(np.array(gt_origin) + line_length * np.array(gt_axis))
gt_new_point = gt_new_point + [1]
gt_new_point = camera_to_image(gt_new_point, is_real, intrinsic_matrix)
# Caluculate the axis and origin error to determine the color for the visualization of axis and origin
axis_diff = (
np.arccos(
np.abs(
np.dot(np.array(gt_axis), np.array(pred_axis))
/ (norm(pred_axis) * norm(gt_axis))
)
)
/ np.pi
* 180.0
)
if axis_diff <= 5:
axis_color = _COLORS_LEVEL[0]
elif axis_diff > 5 and axis_diff <= 10:
axis_color = _COLORS_LEVEL[1]
elif axis_diff > 10:
axis_color = _COLORS_LEVEL[2]
if diagonal_length == -1:
raise ValueError("diagonal length error")
origin_diff = np.linalg.norm(
np.cross(np.array(pred_origin) - np.array(gt_origin), np.array(gt_axis))
) / np.linalg.norm(gt_axis) / diagonal_length
if origin_diff <= 0.1:
origin_color = _COLORS_LEVEL[0]
elif origin_diff > 0.1 and origin_diff <= 0.25:
origin_color = _COLORS_LEVEL[1]
elif origin_diff > 0.25:
origin_color = _COLORS_LEVEL[2]
# Visualize gt
gt_color = np.array([0, 0, 255]) / 255
gt_arrow_p0 = rotatePoint(
gt_new_point[0] - gt_origin_2d[0],
gt_new_point[1] - gt_origin_2d[1],
30,
0.1,
)
gt_arrow_p1 = rotatePoint(
gt_new_point[0] - gt_origin_2d[0],
gt_new_point[1] - gt_origin_2d[1],
-30,
0.1,
)
gt_circle_p = circlePoints(gt_axis, 0.1, 50)
gt_circle_p = line_length * gt_circle_p + np.repeat(
np.asarray(gt_origin)[:, np.newaxis], 50, axis=1
)
gt_circle_p = gt_circle_p.transpose()
gt_circle_p_2d = np.asarray([camera_to_image(p, is_real, intrinsic_matrix) for p in gt_circle_p])
self.draw_line(
[gt_origin_2d[0], gt_new_point[0]],
[gt_origin_2d[1], gt_new_point[1]],
color=gt_color,
linewidth=2,
)
self.draw_line(
[gt_new_point[0] - gt_arrow_p0[0], gt_new_point[0]],
[gt_new_point[1] - gt_arrow_p0[1], gt_new_point[1]],
color=gt_color,
linewidth=2,
)
self.draw_line(
[gt_new_point[0] - gt_arrow_p1[0], gt_new_point[0]],
[gt_new_point[1] - gt_arrow_p1[1], gt_new_point[1]],
color=gt_color,
linewidth=2,
)
self.draw_polygon(
gt_circle_p_2d, color=gt_color, edge_color=gt_color, alpha=0.0
)
if pred_type == 0:
# self.draw_text("origin_error: {:.3f}".format(origin_diff), (origin_2d[0][0], origin_2d[0][1]-10*text_y_offset), color="c")
self.draw_circle(gt_origin_2d, color=gt_color, radius=5)
# Visualize the predicted axis
pred_arrow_p0 = rotatePoint(
pred_new_point[0] - pred_origin_2d[0],
pred_new_point[1] - pred_origin_2d[1],
30,
0.1,
)
pred_arrow_p1 = rotatePoint(
pred_new_point[0] - pred_origin_2d[0],
pred_new_point[1] - pred_origin_2d[1],
-30,
0.1,
)
pred_circle_p = circlePoints(pred_axis, 0.1, 50)
pred_circle_p = line_length * pred_circle_p + np.repeat(
np.asarray(pred_origin)[:, np.newaxis], 50, axis=1
)
pred_circle_p = pred_circle_p.transpose()
pred_circle_p_2d = np.asarray([camera_to_image(p, is_real, intrinsic_matrix) for p in pred_circle_p])
# text_y_offset = 1 if (new_point[1]-origin_2d[0][1]) > 0 else -1
# self.draw_text("axis_error: {:.3f}".format(axis_diff), (origin_2d[0][0], origin_2d[0][1]-20*text_y_offset), color="tan")
self.draw_line(
[pred_origin_2d[0], pred_new_point[0]],
[pred_origin_2d[1], pred_new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_line(
[pred_new_point[0] - pred_arrow_p0[0], pred_new_point[0]],
[pred_new_point[1] - pred_arrow_p0[1], pred_new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_line(
[pred_new_point[0] - pred_arrow_p1[0], pred_new_point[0]],
[pred_new_point[1] - pred_arrow_p1[1], pred_new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_polygon(
pred_circle_p_2d, color=axis_color, edge_color=axis_color, alpha=0.0
)
if pred_type == 0:
# self.draw_text("origin_error: {:.3f}".format(origin_diff), (origin_2d[0][0], origin_2d[0][1]-10*text_y_offset), color="c")
self.draw_circle(pred_origin_2d, color=origin_color, radius=5)
# Assign color to the segmentation
cat_id = prediction.get("category_id", None)
color_cat = _COLORS_CAT[cat_id * 2 + pred_type]
scores = [prediction.get("score", None)]
classes = [prediction.get("category_id", None)]
labels = _create_text_labels_motion(
classes,
scores,
self.metadata.get("thing_classes", None),
MOTION_TYPE[pred_type],
)
keypoints = prediction.get("keypoints", None)
if prediction.get("segmentation"):
import pycocotools.mask as mask_util
masks = [prediction.get("segmentation")]
else:
masks = None
if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
"thing_colors"
):
colors = [
self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
for c in classes
]
alpha = 0.8
else:
colors = [color_cat]
alpha = 0.5
if self._instance_mode == ColorMode.IMAGE_BW:
self.output.img = self._create_grayscale_image(
(mask_util.decode(prediction.get("segmentation")).any() > 0).numpy()
)
alpha = 0.3
# import pdb
# pdb.set_trace()
match["iou"] = max_iou
# Add the gt information
match["gt"] = {}
match["gt"]["partId"] = anno["motion"]["partId"]
match["gt"]["label"] = anno["motion"]["part_label"]
match["gt"]["type"] = anno["motion"]["type"]
match["gt"]["category_id"] = anno["category_id"]
match["gt"]["origin"] = gt_origin
match["gt"]["axis"] = gt_axis
# add the prediction information
match["pred"] = {}
match["pred"]["score"] = scores[0]
match["pred"]["type"] = pred_type
match["pred"]["category_id"] = cat_id
match["pred"]["origin"] = pred_origin
match["pred"]["axis"] = pred_axis
# add additional information
match["axis_error"] = axis_diff
match["origin_error"] = origin_diff
match["match"] = (
int(pred_type)
== int(
list(MOTION_TYPE.keys())[
list(MOTION_TYPE.values()).index(anno["motion"]["type"])
]
)
) and (cat_id == anno["category_id"])
if no_mask:
masks = None
self.overlay_instances(
masks=masks,
boxes=boxes,
labels=labels,
keypoints=keypoints,
assigned_colors=colors,
alpha=alpha,
)
return self.output
def draw_pred_only(self, prediction, prob):
scores = prediction.scores if prediction.has("scores") else None
if scores.numpy()[0] < prob:
return None
origins = list(prediction.morigin.numpy())
origins = [list(origin) for origin in origins]
axises = list(prediction.maxis.numpy())
axises = [list(axis) for axis in axises]
types = list(prediction.mtype.numpy())
classes = prediction.pred_classes if prediction.has("pred_classes") else None
color_cat = _COLORS_CAT[classes.numpy()[0] * 2 + types[0]]
origins_4d = [origin[:] + [1] for origin in origins]
origin_2d = [camera_to_image(origin) for origin in origins_4d]
new_point = list(np.array(origins[0]) + np.array(axises[0]))
new_point = new_point[:] + [1]
new_point = camera_to_image(new_point)
axis_color = _COLORS_LEVEL[0]
origin_color = _COLORS_LEVEL[0]
arrow_p0 = rotatePoint(
new_point[0] - origin_2d[0][0], new_point[1] - origin_2d[0][1], 30, 0.1
)
arrow_p1 = rotatePoint(
new_point[0] - origin_2d[0][0], new_point[1] - origin_2d[0][1], -30, 0.1
)
circle_p = circlePoints(axises[0], 0.1, 50)
circle_p = circle_p + np.repeat(
np.asarray(origins[0])[:, np.newaxis], 50, axis=1
)
circle_p = circle_p.transpose()
circle_p_2d = np.asarray([camera_to_image(p) for p in circle_p])
# text_y_offset = 1 if (new_point[1]-origin_2d[0][1]) > 0 else -1
# self.draw_text("axis_error: {:.3f}".format(axis_diff), (origin_2d[0][0], origin_2d[0][1]-20*text_y_offset), color="tan")
self.draw_line(
[origin_2d[0][0], new_point[0]],
[origin_2d[0][1], new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_line(
[new_point[0] - arrow_p0[0], new_point[0]],
[new_point[1] - arrow_p0[1], new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_line(
[new_point[0] - arrow_p1[0], new_point[0]],
[new_point[1] - arrow_p1[1], new_point[1]],
color=axis_color,
linewidth=2,
)
self.draw_polygon(
circle_p_2d, color=axis_color, edge_color=axis_color, alpha=0.0
)
if types[0] == 0:
# self.draw_text("origin_error: {:.3f}".format(origin_diff), (origin_2d[0][0], origin_2d[0][1]-10*text_y_offset), color="c")
self.draw_circle(origin_2d[0], color=origin_color, radius=5)
boxes = prediction.pred_boxes if prediction.has("pred_boxes") else None
labels = _create_text_labels_motion(
classes,
scores,
self.metadata.get("thing_classes", None),
MOTION_TYPE[types[0]],
)
keypoints = (
prediction.pred_keypoints if prediction.has("pred_keypoints") else None
)
if prediction.has("pred_masks"):
masks = np.asarray(prediction.pred_masks)
masks = [
GenericMask(x, self.output.height, self.output.width) for x in masks
]
else:
masks = None
if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
"thing_colors"
):
colors = [
self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
for c in classes
]
alpha = 0.8
else:
colors = [color_cat]
alpha = 0.5
if self._instance_mode == ColorMode.IMAGE_BW:
self.output.img = self._create_grayscale_image(
(prediction.pred_masks.any(dim=0) > 0).numpy()
)
alpha = 0.3
self.overlay_instances(
masks=masks,
boxes=boxes,
labels=labels,
keypoints=keypoints,
assigned_colors=colors,
alpha=alpha,
)
return self.output
def _create_text_labels_motion(classes, scores, class_names, motion_type):
"""
Args:
classes (list[int] or None):
scores (list[float] or None):
class_names (list[str] or None):
Returns:
list[str] or None
"""
labels = None
if classes is not None and class_names is not None and len(class_names) > 1:
labels = [class_names[i] for i in classes]
labels = [label + "_" + motion_type for label in labels]
if scores is not None:
if labels is None:
labels = ["{:.0f}%".format(s * 100) for s in scores]
else:
labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
return labels