Problem with getting point cloud in isaac-sim

Please provide all relevant details below before submitting your post. This will help the community provide more accurate and timely assistance. After submitting, you can check the appropriate boxes. Remember, you can always edit your post later to include additional information if needed.

Isaac Sim Version

4.5.0
4.2.0
4.1.0
4.0.0
4.5.0
2023.1.1
2023.1.0-hotfix.1
Other (please specify):

Operating System

Ubuntu 22.04
Ubuntu 20.04
Windows 11
Windows 10
Other (please specify):

GPU Information

  • Model: RTX 3070 8Gb
  • Driver Version: 535.183.01

Topic Description

Detailed Description

I am trying to obtain the point cloud from the depth camera. The camera has an orthographic projection view. As can be seen in the attached pictures, the point cloud obtained through my transformations results in a yz arch in both depth and point cloud. I feel that the transformations are wrong but fail to see how. If anyone has gone through this or maybe, better yet, has an alternative solution, I would be grateful.

Steps to Reproduce

  1. I have attached the task and camera setup, which is quite rudimentary to run on an Issac-sim Python example. I can also upload the main calling file, but I feel it is irrelevant.
  2. The attached pictures can be traced back in the code.

Error Messages

No Errors.

Screenshots or Videos





Additional Information

What I’ve Tried

I have tried using both the distance and the depth image as input for the get_pointcloud() function

Files

point_cloud_utils.py

import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.transform import Rotation as R
import carb

def save_point_cloud_as_png(point_cloud, filename, projection="xy"):
    """
    Save a 2D projection of the point cloud as a .png image.
    :param point_cloud: Nx3 array of points (x, y, z).
    :param filename: Output file name.
    :param projection: Projection plane ('xy', 'xz', or 'yz').
    """
    # Ensure the point cloud is Nx3
    assert point_cloud.shape[1] == 3, "Point cloud must be Nx3"

    # Select the projection plane
    if projection == "xy":
        x, y = point_cloud[:, 0], point_cloud[:, 1]
        xlabel, ylabel = "X", "Y"
    elif projection == "xz":
        x, y = point_cloud[:, 0], point_cloud[:, 2]
        xlabel, ylabel = "X", "Z"
    elif projection == "yz":
        x, y = point_cloud[:, 1], point_cloud[:, 2]
        xlabel, ylabel = "Y", "Z"
    else:
        raise ValueError("Invalid projection. Use 'xy', 'xz', or 'yz'.")

    # Create the scatter plot
    plt.figure(figsize=(10, 10))
    plt.scatter(
        x, y, s=1, c=point_cloud[:, 2], cmap="viridis"
    )  # Color by Z-axis for depth
    plt.colorbar(label="Depth (Z)")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(f"2D Projection of Point Cloud ({projection.upper()} Plane)")
    plt.grid(True)

    # Save the plot as a .png file
    plt.savefig(filename, dpi=300, bbox_inches="tight")
    plt.close()


def depth_image_from_distance_image(distance, intrinsics):
    """Computes depth image from distance image.

    Background pixels have depth of 0

    Args:
        distance: HxW float array (meters)
        intrinsics: 3x3 float array

    Returns:
        z: HxW float array (meters)

    """
    fx = intrinsics[0][0]
    cx = intrinsics[0][2]
    fy = intrinsics[1][1]
    cy = intrinsics[1][2]

    height, width = distance.shape
    xlin = np.linspace(0, width - 1, width)
    ylin = np.linspace(0, height - 1, height)
    px, py = np.meshgrid(xlin, ylin)

    x_over_z = (px - cx) / fx
    y_over_z = (py - cy) / fy

    # Compute depth
    z = distance / np.sqrt(1.0 + x_over_z**2 + y_over_z**2)

    # Handle background pixels
    # Assuming background pixels in the distance image are represented by a large value (e.g., infinity)
    # get the largest value in the distance image
    background_value = np.max(distance)
    z[distance == background_value] = 0

    return z


def get_pointcloud(color_img, distance_img, cam_intrinsics, is_orthographic=True):
    """Convert RGB-D images to a 3D point cloud.
    
    Args:
        color_img: RGB image (HxWx3)
        distance_img: Distance image (HxW) or depth image
        cam_intrinsics: Camera intrinsics matrix (3x3)
        is_orthographic: Whether the camera uses orthographic projection
        
    Returns:
        points: Nx3 array of 3D points
        colors: Nx3 array of RGB colors
    """
    # Get image dimensions
    im_h = distance_img.shape[0]
    im_w = distance_img.shape[1]

    # Convert distance to depth if needed
    depth_img = depth_image_from_distance_image(distance_img, cam_intrinsics)
    
    # Project depth into 3D point cloud in camera coordinates
    pix_x, pix_y = np.meshgrid(
        np.linspace(0, im_w - 1, im_w), np.linspace(0, im_h - 1, im_h)
    )
    
    if is_orthographic:
        # For orthographic projection, the x and y coordinates are directly proportional
        # to the pixel coordinates, and don't depend on depth
        fx = cam_intrinsics[0, 0]
        fy = cam_intrinsics[1, 1]
        cx = cam_intrinsics[0, 2]
        cy = cam_intrinsics[1, 2]
        
        # Calculate world coordinates (flat plane, no perspective)
        cam_pts_x = (pix_x - cx) / fx  # Scale by focal length but don't multiply by depth
        cam_pts_y = (pix_y - cy) / fy
        cam_pts_z = depth_img.copy()  # Z is just the depth
    else:
        # For perspective projection
        cam_pts_x = np.multiply(
            pix_x - cam_intrinsics[0][2], depth_img / cam_intrinsics[0][0]
        )
        cam_pts_y = np.multiply(
            pix_y - cam_intrinsics[1][2], depth_img / cam_intrinsics[1][1]
        )
        cam_pts_z = depth_img.copy()
    
    # Reshape for output
    cam_pts_x = cam_pts_x.reshape(-1, 1)
    cam_pts_y = cam_pts_y.reshape(-1, 1)
    cam_pts_z = cam_pts_z.reshape(-1, 1)

    # Reshape image into colors for 3D point cloud
    rgb_pts_r = color_img[:, :, 0]
    rgb_pts_g = color_img[:, :, 1]
    rgb_pts_b = color_img[:, :, 2]
    rgb_pts_r = rgb_pts_r.reshape(-1, 1)
    rgb_pts_g = rgb_pts_g.reshape(-1, 1)
    rgb_pts_b = rgb_pts_b.reshape(-1, 1)

    # Combine points and colors
    cam_pts = np.concatenate((cam_pts_x, cam_pts_y, cam_pts_z), axis=1)
    rgb_pts = np.concatenate((rgb_pts_r, rgb_pts_g, rgb_pts_b), axis=1)
    
    # Filter out points with zero or invalid depth
    valid_depth = (cam_pts_z.reshape(-1) > 0) & np.isfinite(cam_pts_z.reshape(-1))
    cam_pts = cam_pts[valid_depth]
    rgb_pts = rgb_pts[valid_depth]

    # Debug: Save raw point cloud before any transformations
    plt.figure(figsize=(10, 10))
    plt.scatter(cam_pts[:, 0], cam_pts[:, 1], c=cam_pts[:, 2], cmap="viridis", s=1)
    plt.colorbar(label="Depth (Z)")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.title("Raw Point Cloud (Camera Frame)")
    plt.savefig("raw_point_cloud.png")
    plt.close()
    
    # Debug: 3D visualization of raw point cloud
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')
    # Sample points to avoid overcrowding
    sample_size = min(5000, cam_pts.shape[0])
    if sample_size > 0:
        sample_indices = np.random.choice(cam_pts.shape[0], sample_size, replace=False)
        ax.scatter(
            cam_pts[sample_indices, 0], 
            cam_pts[sample_indices, 1], 
            cam_pts[sample_indices, 2], 
            c=cam_pts[sample_indices, 2], 
            cmap="viridis",
            s=5
        )
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    ax.set_title('3D Raw Point Cloud (Camera Frame)')
    plt.savefig("3d_raw_point_cloud.png")
    plt.close()

    return cam_pts, rgb_pts


def get_heightmap(
    color_img,
    distance_img,
    cam_intrinsics,
    cam_pose,
    workspace_limits,
    heightmap_resolution,
    is_orthographic=True
):
    """Generate a heightmap from RGB-D images.
    
    Args:
        color_img: RGB image (HxWx3)
        distance_img: Distance image (HxW)
        cam_intrinsics: Camera intrinsics matrix (3x3)
        cam_pose: Camera pose as a 4x4 transformation matrix or a tuple of (position, orientation)
        workspace_limits: Workspace limits as [[x_min, x_max], [y_min, y_max], [z_min, z_max]]
        heightmap_resolution: Resolution of the heightmap in meters
        is_orthographic: Whether the camera uses orthographic projection
        
    Returns:
        color_heightmap: RGB heightmap (HxWx3)
        depth_heightmap: Depth heightmap (HxW)
    """
    # # Convert cam_pose to a 4x4 transformation matrix if it's a tuple
    # if isinstance(cam_pose, tuple):
    #     position, orientation = cam_pose
    #     # Convert quaternion to rotation matrix
    #     rotation_matrix = R.from_quat(orientation).as_matrix()
        
    #     # Create transformation matrix
    #     transform = np.eye(4)
    #     transform[:3, :3] = rotation_matrix
    #     transform[:3, 3] = position
    #     cam_pose = transform
    
    # Ensure cam_pose is a numpy array
    cam_pose = np.array(cam_pose)
    carb.log_warn(f"Camera pose matrix:\n{cam_pose}")
    carb.log_warn(f"Camera intrinsics matrix:\n{cam_intrinsics}")
    
    # Compute heightmap size
    heightmap_size = np.round(
        (
            (workspace_limits[1][1] - workspace_limits[1][0]) / heightmap_resolution,
            (workspace_limits[0][1] - workspace_limits[0][0]) / heightmap_resolution,
        )
    ).astype(int)
    carb.log_warn(f"Workspace limits: {workspace_limits}")
    carb.log_warn(f"Heightmap size: {heightmap_size}")
    
    # Get 3D point cloud from RGB-D images
    surface_pts, color_pts = get_pointcloud(color_img, distance_img, cam_intrinsics, is_orthographic)
    carb.log_warn(f"Point cloud shape before transform: {surface_pts.shape}")
    
    # Transform 3D point cloud from camera coordinates to robot coordinates
    # For orthographic cameras, we need to be careful with the transformation
    if is_orthographic:
        # Create homogeneous coordinates
        surface_pts_homogeneous = np.hstack((surface_pts, np.ones((surface_pts.shape[0], 1))))
        # Apply full transformation matrix
        transformed_pts = np.dot(cam_pose, surface_pts_homogeneous.T).T
        surface_pts = transformed_pts[:, :3]  # Remove homogeneous coordinate
    else:
        # Using the reference implementation approach for perspective cameras
        surface_pts = np.transpose(
            np.dot(cam_pose[0:3, 0:3], np.transpose(surface_pts))
            + np.tile(cam_pose[0:3, 3:], (1, surface_pts.shape[0]))
        )
    
    carb.log_warn(f"Point cloud shape after transform: {surface_pts.shape}")
    
    # Debug visualization of the transformed point cloud
    plt.figure(figsize=(10, 10))
    plt.scatter(surface_pts[:, 0], surface_pts[:, 1], c=surface_pts[:, 2], cmap="viridis", s=1)
    plt.colorbar(label="Height (Z)")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.title("Transformed Point Cloud (World Frame)")
    plt.savefig("transformed_point_cloud.png")
    plt.close()
    
    # 3D visualization of transformed point cloud
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')
    # Sample points to avoid overcrowding
    sample_size = min(5000, surface_pts.shape[0])
    if sample_size > 0:
        sample_indices = np.random.choice(surface_pts.shape[0], sample_size, replace=False)
        ax.scatter(
            surface_pts[sample_indices, 0], 
            surface_pts[sample_indices, 1], 
            surface_pts[sample_indices, 2], 
            c=surface_pts[sample_indices, 2], 
            cmap="viridis",
            s=5
        )
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    ax.set_title('3D Transformed Point Cloud (World Frame)')
    plt.savefig("3d_transformed_point_cloud.png")
    plt.close() 
    
    # Print min/max values to check against workspace limits
    x_min, x_max = np.min(surface_pts[:, 0]), np.max(surface_pts[:, 0])
    y_min, y_max = np.min(surface_pts[:, 1]), np.max(surface_pts[:, 1])
    z_min, z_max = np.min(surface_pts[:, 2]), np.max(surface_pts[:, 2])
    carb.log_warn(f"Point cloud X range: {x_min} to {x_max}")
    carb.log_warn(f"Point cloud Y range: {y_min} to {y_max}")
    carb.log_warn(f"Point cloud Z range: {z_min} to {z_max}")
    
    # Filter out surface points outside heightmap boundaries
    heightmap_valid_ind = np.logical_and(
        np.logical_and(
            np.logical_and(
                np.logical_and(
                    surface_pts[:, 0] >= workspace_limits[0][0],
                    surface_pts[:, 0] < workspace_limits[0][1],
                ),
                surface_pts[:, 1] >= workspace_limits[1][0],
            ),
            surface_pts[:, 1] < workspace_limits[1][1],
        ),
        surface_pts[:, 2] < workspace_limits[2][1],
    )
    
    # Check if any valid points exist
    valid_count = np.sum(heightmap_valid_ind)
    carb.log_warn(f"Valid points within workspace: {valid_count} out of {surface_pts.shape[0]}")
    
    if valid_count == 0:
        carb.log_warn("No valid points found within workspace limits")
        # Try with expanded workspace limits for debugging
        expanded_limits = [
            [workspace_limits[0][0] - 0.1, workspace_limits[0][1] + 0.1],
            [workspace_limits[1][0] - 0.1, workspace_limits[1][1] + 0.1],
            [workspace_limits[2][0] - 0.1, workspace_limits[2][1] + 0.1]
        ]
        carb.log_warn(f"Trying with expanded limits: {expanded_limits}")
        
        # Check with expanded limits
        expanded_valid_ind = np.logical_and(
            np.logical_and(
                np.logical_and(
                    np.logical_and(
                        surface_pts[:, 0] >= expanded_limits[0][0],
                        surface_pts[:, 0] < expanded_limits[0][1],
                    ),
                    surface_pts[:, 1] >= expanded_limits[1][0],
                ),
                surface_pts[:, 1] < expanded_limits[1][1],
            ),
            surface_pts[:, 2] < expanded_limits[2][1],
        )
        expanded_valid_count = np.sum(expanded_valid_ind)
        carb.log_warn(f"Valid points with expanded limits: {expanded_valid_count}")
        
        if expanded_valid_count > 0:
            carb.log_warn("Points found with expanded limits - consider adjusting your workspace limits")
        
        return np.zeros((heightmap_size[0], heightmap_size[1], 3), dtype=np.uint8), np.zeros(heightmap_size)
    
    surface_pts = surface_pts[heightmap_valid_ind]
    color_pts = color_pts[heightmap_valid_ind]
    
    # Debug visualization of the filtered point cloud
    plt.figure(figsize=(10, 10))
    plt.scatter(surface_pts[:, 0], surface_pts[:, 1], c=surface_pts[:, 2], cmap="viridis", s=1)
    plt.colorbar(label="Height (Z)")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.title("Filtered Point Cloud (Within Workspace)")
    plt.savefig("filtered_point_cloud.png")
    plt.close()
    
    # Create orthographic top-down-view RGB-D heightmaps
    color_heightmap_r = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
    color_heightmap_g = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
    color_heightmap_b = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
    depth_heightmap = np.zeros(heightmap_size)
    
    # Convert 3D points to pixel coordinates in the heightmap
    heightmap_pix_x = np.floor(
        (surface_pts[:, 0] - workspace_limits[0][0]) / heightmap_resolution
    ).astype(int)
    heightmap_pix_y = np.floor(
        (surface_pts[:, 1] - workspace_limits[1][0]) / heightmap_resolution
    ).astype(int)
    
    # Clip pixel coordinates to valid range
    heightmap_pix_x = np.clip(heightmap_pix_x, 0, heightmap_size[1] - 1)
    heightmap_pix_y = np.clip(heightmap_pix_y, 0, heightmap_size[0] - 1)
    
    # Populate the heightmap
    color_heightmap_r[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [0]]
    color_heightmap_g[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [1]]
    color_heightmap_b[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [2]]
    
    # For the depth heightmap, we want the highest point (max z) for each pixel
    depth_heightmap[heightmap_pix_y, heightmap_pix_x] = surface_pts[:, 2]
    
    # Combine color channels
    color_heightmap = np.concatenate(
        (color_heightmap_r, color_heightmap_g, color_heightmap_b), axis=2
    )
    
    # Normalize depth heightmap
    z_bottom = workspace_limits[2][0]
    depth_heightmap = depth_heightmap - z_bottom
    depth_heightmap[depth_heightmap < 0] = 0
    depth_heightmap[depth_heightmap == -z_bottom] = np.nan
    
    # Save heightmaps for debugging
    plt.figure(figsize=(10, 10))
    plt.imshow(color_heightmap)
    plt.title("Color Heightmap")
    plt.savefig("color_heightmap.png")
    plt.close()
    
    plt.figure(figsize=(10, 10))
    plt.imshow(depth_heightmap, cmap="viridis")
    plt.colorbar(label="Height")
    plt.title("Depth Heightmap")
    plt.savefig("depth_heightmap.png")
    plt.close()
    
    return color_heightmap, depth_heightmap


def create_point_cloud_with_open3d(color_img, depth_img, cam_intrinsics, transform=None):
    """Create a point cloud using Open3D from RGB and depth images.
    
    Args:
        color_img: RGB image (HxWx3) as numpy array
        depth_img: Depth image (HxW) as numpy array in meters
        cam_intrinsics: Camera intrinsics matrix (3x3)
        transform: Optional 4x4 transformation matrix to apply to the point cloud
        
    Returns:
        points: Nx3 array of 3D points
        colors: Nx3 array of RGB colors
    """
    try:
        import open3d as o3d
    except ImportError:
        carb.log_error("Open3D is not installed. Please install it with 'pip install open3d'")
        return None, None
    
    # Convert images to Open3D format
    color_o3d = o3d.geometry.Image(color_img.astype(np.uint8))
    
    # Make sure depth is in the right format (float32 in meters)
    if depth_img.dtype != np.float32:
        depth_img = depth_img.astype(np.float32)
    
    # Create Open3D depth image
    depth_o3d = o3d.geometry.Image(depth_img)
    
    # Create RGBD image
    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
        color_o3d, depth_o3d, 
        depth_scale=1.0,  # depth is already in meters
        depth_trunc=10.0,  # max depth in meters
        convert_rgb_to_intensity=False
    )
    
    # Create intrinsic parameters
    fx, fy = cam_intrinsics[0, 0], cam_intrinsics[1, 1]
    cx, cy = cam_intrinsics[0, 2], cam_intrinsics[1, 2]
    width, height = depth_img.shape[1], depth_img.shape[0]
    
    intrinsic = o3d.camera.PinholeCameraIntrinsic(
        width=width, height=height,
        fx=fx, fy=fy, cx=cx, cy=cy
    )
    
    # Create point cloud from RGBD image
    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
        rgbd_image, intrinsic
    )
    
    # Apply transformation if provided
    if transform is not None:
        pcd.transform(transform)
    else:
        # Default transformation to flip the point cloud (assuming camera looks down along z-axis)
        pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
    
    # Extract points and colors
    points = np.asarray(pcd.points)
    colors = np.asarray(pcd.colors)
    
    # Save visualization
    vis_filename = "open3d_point_cloud.png"
    carb.log_warn(f"Saving point cloud visualization to {vis_filename}")
    
    # Visualize and save
    try:
        # Create a visualization object
        vis = o3d.visualization.Visualizer()
        vis.create_window(visible=False)  # Create an off-screen window
        vis.add_geometry(pcd)
        
        # Set view control
        ctr = vis.get_view_control()
        ctr.set_zoom(0.5)
        
        # Capture image
        vis.poll_events()
        vis.update_renderer()
        img = vis.capture_screen_float_buffer(True)
        plt.figure(figsize=(10, 10))
        plt.imshow(np.asarray(img))
        plt.axis('off')
        plt.savefig(vis_filename, bbox_inches='tight')
        plt.close()
        
        # Close visualizer
        vis.destroy_window()
    except Exception as e:
        carb.log_warn(f"Failed to visualize point cloud: {e}")
    
    return points, colors


def get_heightmap_with_open3d(
    color_img,
    distance_img,
    cam_intrinsics,
    cam_pose,
    workspace_limits,
    heightmap_resolution
):
    """Generate a heightmap using Open3D for point cloud creation.
    
    Args:
        color_img: RGB image (HxWx3)
        distance_img: Distance image (HxW)
        cam_intrinsics: Camera intrinsics matrix (3x3)
        cam_pose: Camera pose as a 4x4 transformation matrix or a tuple of (position, orientation)
        workspace_limits: Workspace limits as [[x_min, x_max], [y_min, y_max], [z_min, z_max]]
        heightmap_resolution: Resolution of the heightmap in meters
        
    Returns:
        color_heightmap: RGB heightmap (HxWx3)
        depth_heightmap: Depth heightmap (HxW)
    """
    # Convert cam_pose to a 4x4 transformation matrix if it's a tuple
    if isinstance(cam_pose, tuple):
        position, orientation = cam_pose
        # Convert quaternion to rotation matrix
        rotation_matrix = R.from_quat(orientation).as_matrix()
        
        # Create transformation matrix
        transform = np.eye(4)
        transform[:3, :3] = rotation_matrix
        transform[:3, 3] = position
        cam_pose = transform
    
    # Ensure cam_pose is a numpy array
    cam_pose = np.array(cam_pose)
    carb.log_warn(f"Camera pose matrix:\n{cam_pose}")
    
    # Convert distance image to depth image
    depth_img = depth_image_from_distance_image(distance_img, cam_intrinsics)
    
    # Create point cloud using Open3D
    surface_pts, color_pts = create_point_cloud_with_open3d(
        color_img, depth_img, cam_intrinsics, transform=cam_pose
    )
    
    if surface_pts is None:
        carb.log_error("Failed to create point cloud with Open3D")
        return None, None
    
    carb.log_warn(f"Point cloud shape: {surface_pts.shape}")
    
    # Compute heightmap size
    heightmap_size = np.round(
        (
            (workspace_limits[1][1] - workspace_limits[1][0]) / heightmap_resolution,
            (workspace_limits[0][1] - workspace_limits[0][0]) / heightmap_resolution,
        )
    ).astype(int)
    
    # Print min/max values to check against workspace limits
    x_min, x_max = np.min(surface_pts[:, 0]), np.max(surface_pts[:, 0])
    y_min, y_max = np.min(surface_pts[:, 1]), np.max(surface_pts[:, 1])
    z_min, z_max = np.min(surface_pts[:, 2]), np.max(surface_pts[:, 2])
    carb.log_warn(f"Point cloud X range: {x_min} to {x_max}")
    carb.log_warn(f"Point cloud Y range: {y_min} to {y_max}")
    carb.log_warn(f"Point cloud Z range: {z_min} to {z_max}")
    
    # Sort surface points by z value
    sort_z_ind = np.argsort(surface_pts[:, 2])
    surface_pts = surface_pts[sort_z_ind]
    color_pts = color_pts[sort_z_ind]
    
    # Filter out surface points outside heightmap boundaries
    heightmap_valid_ind = np.logical_and(
        np.logical_and(
            np.logical_and(
                np.logical_and(
                    surface_pts[:, 0] >= workspace_limits[0][0],
                    surface_pts[:, 0] < workspace_limits[0][1],
                ),
                surface_pts[:, 1] >= workspace_limits[1][0],
            ),
            surface_pts[:, 1] < workspace_limits[1][1],
        ),
        surface_pts[:, 2] < workspace_limits[2][1],
    )
    
    # Check if any valid points exist
    valid_count = np.sum(heightmap_valid_ind)
    carb.log_warn(f"Valid points within workspace: {valid_count} out of {surface_pts.shape[0]}")
    
    if valid_count == 0:
        carb.log_warn("No valid points found within workspace limits")
        return np.zeros((heightmap_size[0], heightmap_size[1], 3), dtype=np.uint8), np.zeros(heightmap_size)
    
    surface_pts = surface_pts[heightmap_valid_ind]
    color_pts = color_pts[heightmap_valid_ind]
    
    # Create orthographic top-down-view RGB-D heightmaps
    color_heightmap_r = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
    color_heightmap_g = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
    color_heightmap_b = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
    depth_heightmap = np.zeros(heightmap_size)
    
    # Convert 3D points to pixel coordinates in the heightmap
    heightmap_pix_x = np.floor(
        (surface_pts[:, 0] - workspace_limits[0][0]) / heightmap_resolution
    ).astype(int)
    heightmap_pix_y = np.floor(
        (surface_pts[:, 1] - workspace_limits[1][0]) / heightmap_resolution
    ).astype(int)
    
    # Clip pixel coordinates to valid range
    heightmap_pix_x = np.clip(heightmap_pix_x, 0, heightmap_size[1] - 1)
    heightmap_pix_y = np.clip(heightmap_pix_y, 0, heightmap_size[0] - 1)
    
    # Populate the heightmap
    # Convert color values from [0,1] to [0,255] if needed
    if color_pts.max() <= 1.0:
        color_pts = (color_pts * 255).astype(np.uint8)
    
    color_heightmap_r[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [0]]
    color_heightmap_g[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [1]]
    color_heightmap_b[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [2]]
    
    # For the depth heightmap, we want the highest point (max z) for each pixel
    depth_heightmap[heightmap_pix_y, heightmap_pix_x] = surface_pts[:, 2]
    
    # Combine color channels
    color_heightmap = np.concatenate(
        (color_heightmap_r, color_heightmap_g, color_heightmap_b), axis=2
    )
    
    # Normalize depth heightmap
    z_bottom = workspace_limits[2][0]
    depth_heightmap = depth_heightmap - z_bottom
    depth_heightmap[depth_heightmap < 0] = 0
    depth_heightmap[depth_heightmap == -z_bottom] = np.nan
    
    # Save heightmaps for debugging
    plt.figure(figsize=(10, 10))
    plt.imshow(color_heightmap)
    plt.title("Color Heightmap (Open3D)")
    plt.savefig("color_heightmap_open3d.png")
    plt.close()
    
    plt.figure(figsize=(10, 10))
    plt.imshow(depth_heightmap, cmap="viridis")
    plt.colorbar(label="Height")
    plt.title("Depth Heightmap (Open3D)")
    plt.savefig("depth_heightmap_open3d.png")
    plt.close()
    
    return color_heightmap, depth_heightmap

pick_and_place_task.py

Note: This is not the full code.


    def set_camera(self):
        # Position camera 1m above the front platform, facing downward
        overhead_position = np.array([0.5, 0, 0.5])  # Centered above the platform
        overhead_orientation = np.array(
            [0.5, -0.5, 0.5, 0.5]
        )  # Facing downward (90 degrees around X-axis)
        perspective_position = np.array([1.0, 0, 0.5])  # Centered above the platform
        perspective_orientation = euler_angles_to_quat(
            np.array([np.pi * 2, np.pi / 2, np.pi])
        )

        # Create the camera
        self.camera = Camera(
            prim_path="/World/OverheadCamera",  # Unique prim path
            frequency=20,
            resolution=(540, 540),
            position=overhead_position,
            orientation=overhead_orientation,
        )
        self.camera.initialize()
        APERTURE_SIZE = 0.5
        self.camera.set_projection_mode("orthographic")  # Set orthographic projection
        self.camera.set_focal_length(1.93)
        self.camera.set_focus_distance(4)
        self.camera.set_horizontal_aperture(APERTURE_SIZE)  # Set horizontal aperture
        self.camera.set_vertical_aperture(
            APERTURE_SIZE
        )  # Set vertical aperture to the same value
        self.camera.set_clipping_range(0.01, 10000)

        # Add necessary frame processing

        self.camera.add_distance_to_camera_to_frame()
        self.camera.add_instance_segmentation_to_frame()
        self.camera.add_instance_id_segmentation_to_frame()
        self.camera.add_semantic_segmentation_to_frame()
        self.camera.add_bounding_box_2d_loose_to_frame()
        self.camera.add_bounding_box_2d_tight_to_frame()

        # Create the camera
        self.camera_persp = Camera(
            prim_path="/World/PerspCamera",  # Unique prim path
            frequency=20,
            resolution=(640, 480),
            position=perspective_position,
            orientation=perspective_orientation,
        )
        self.camera_persp.initialize()
        APERTURE_SIZE = 0.5
        self.camera_persp.set_focal_length(1.93)
        self.camera_persp.set_focus_distance(4)
        self.camera_persp.set_horizontal_aperture(
            APERTURE_SIZE
        )  # Set horizontal aperture
        self.camera_persp.set_vertical_aperture(
            APERTURE_SIZE
        )  # Set vertical aperture to the same value
        self.camera_persp.set_clipping_range(0.01, 10000)

        # Add necessary frame processing

        self.camera_persp.add_distance_to_camera_to_frame()
        self.camera_persp.add_instance_segmentation_to_frame()
        self.camera_persp.add_instance_id_segmentation_to_frame()
        self.camera_persp.add_semantic_segmentation_to_frame()
        self.camera_persp.add_bounding_box_2d_loose_to_frame()
        self.camera_persp.add_bounding_box_2d_tight_to_frame()
        return
    

def get_height_at_position(self, rgb_image: np.ndarray, distance_image: np.ndarray, camera: Camera, position: np.ndarray) -> float:
        """Get the height at a specific position using the depth camera.

        Args:
            camera (Camera): The camera object to capture images from.
            position (np.ndarray): The world coordinates (x, y) to measure height.

        Returns:
            float: The height at the specified position.
        """
        self.heightmap_resolution = 0.001

        # Get camera intrinsics and pose
        cam_intrinsics = camera.get_intrinsics_matrix()
        cam_position, cam_orientation = camera.get_local_pose()
        carb.log_warn(f"cam_position: {cam_position}, cam_orientation: {cam_orientation}")

        # Convert quaternion to rotation matrix
        rotation_matrix = R.from_quat(cam_orientation).as_matrix()

        # Construct the full transformation matrix
        cam_pose = np.eye(4)
        cam_pose[0:3, 0:3] = rotation_matrix
        cam_pose[0:3, 3] = cam_position
        # Compute the heightmap

        # Capture the current frame
        frame = camera.get_current_frame()

        # Extract RGB image
        rgb_image = frame["rgba"][:, :, :3]

        # Extract and process distance image to get depth image
        distance_image = frame["distance_to_camera"]

        _, depth_heightmap = get_heightmap(
            rgb_image,
            distance_image,
            cam_intrinsics,
            cam_pose,
            self.workspace_limits,
            self.heightmap_resolution,
        )

        # Convert the world position to heightmap pixel coordinates
        pixel_x = int(
            (position[0] - self.workspace_limits[0][0]) / self.heightmap_resolution
        )
        pixel_y = int(
            (position[1] - self.workspace_limits[1][0]) / self.heightmap_resolution
        )

        # Get the height value from the heightmap
        height = depth_heightmap[pixel_y, pixel_x]

        return height