Hi all,
this code snippet is part of the DataVisualizationWriter that will come out with Isaac Sim 2023.1.1 and draws 3d bounding boxes on top of an image:
where:
bb_annot_name="bounding_box_3d_fast"
draw = ImageDraw.Draw(background_img)
(e.g. the rgb image)
data
is the dictionary from the write function def write(self, data: dict)
def _draw_3d_bounding_boxes(
self, draw: ImageDraw, data: dict, bb_annot_name: str, render_product_name: str, write_params: dict
):
# Get the 3d bbox data from the annotator
annot_data = data[bb_annot_name]["data"]
# Access the camera parameters
multiple_render_products = len(self._render_product_names) > 1
camera_params_annot_name = (
"camera_params" if not multiple_render_products else f"camera_params-{render_product_name}"
)
# Transpose is needed for the row-column-major conversion
cam_view_transform = data[camera_params_annot_name]["cameraViewTransform"].reshape((4, 4))
cam_view_transform = cam_view_transform.T
cam_projection_transform = data[camera_params_annot_name]["cameraProjection"].reshape((4, 4))
cam_projection_transform = cam_projection_transform.T
# The resolution is used to map the Normalized Device Coordinates (NDC) to screen space
screen_width, screen_height = data[camera_params_annot_name]["renderProductResolution"]
# Get the line draw parameters
line_color = "green" if "fill" not in write_params else write_params["fill"]
line_width = 1 if "width" not in write_params else write_params["width"]
# Iterate the bounding boxes and draw the edges
for bbox_data in annot_data:
# ('semanticId', '<u4'), ('x_min', '<f4'), ('y_min', '<f4'), ('z_min', '<f4'), ('x_max', '<f4'), ('y_max', '<f4'), ('z_max', '<f4'), ('transform', '<f4', (4, 4)), ('occlusionRatio', '<f4')
# Bounding box points in local coordinate system
x_min, y_min, z_min, x_max, y_max, z_max = (
bbox_data[1],
bbox_data[2],
bbox_data[3],
bbox_data[4],
bbox_data[5],
bbox_data[6],
)
# Transformation matrix from local to world coordinate system
local_to_world_transform = bbox_data[7]
local_to_world_transform = local_to_world_transform.T
# Calculate all 8 vertices of the bounding box in local space
vertices_local = [
np.array([x_min, y_min, z_min, 1]),
np.array([x_min, y_min, z_max, 1]),
np.array([x_min, y_max, z_min, 1]),
np.array([x_min, y_max, z_max, 1]),
np.array([x_max, y_min, z_min, 1]),
np.array([x_max, y_min, z_max, 1]),
np.array([x_max, y_max, z_min, 1]),
np.array([x_max, y_max, z_max, 1]),
]
# Transform vertices to world, camera, and screen space
vertices_screen = []
for vertex in vertices_local:
# Transform to world space
world_homogeneous = np.dot(local_to_world_transform, vertex)
# Transform to camera space
camera_homogeneous = np.dot(cam_view_transform, world_homogeneous)
# Projection transformation
clip_space = np.dot(cam_projection_transform, camera_homogeneous)
# Normalize Device Coordinates (NDC)
ndc = clip_space[:3] / clip_space[3]
# Map NDC to screen space
screen_point = ((ndc[0] + 1) * screen_width / 2, (1 - ndc[1]) * screen_height / 2)
vertices_screen.append(screen_point)
# Draw the bounding box edges
draw.line([vertices_screen[0], vertices_screen[1]], fill=line_color, width=line_width)
draw.line([vertices_screen[0], vertices_screen[2]], fill=line_color, width=line_width)
draw.line([vertices_screen[0], vertices_screen[4]], fill=line_color, width=line_width)
draw.line([vertices_screen[1], vertices_screen[3]], fill=line_color, width=line_width)
draw.line([vertices_screen[1], vertices_screen[5]], fill=line_color, width=line_width)
draw.line([vertices_screen[2], vertices_screen[3]], fill=line_color, width=line_width)
draw.line([vertices_screen[2], vertices_screen[6]], fill=line_color, width=line_width)
draw.line([vertices_screen[3], vertices_screen[7]], fill=line_color, width=line_width)
draw.line([vertices_screen[4], vertices_screen[5]], fill=line_color, width=line_width)
draw.line([vertices_screen[4], vertices_screen[6]], fill=line_color, width=line_width)
draw.line([vertices_screen[5], vertices_screen[7]], fill=line_color, width=line_width)
draw.line([vertices_screen[6], vertices_screen[7]], fill=line_color, width=line_width)