I have two requests:
-
visualization of tight 2 bounding box in fruitbox demo
-
visualization of 3D bounding box in fruitbox demo
Further, how can I save the camera intrinsic matrix (K) when running the script using BasicWriter?
I believe not having access to K is what viz of 2 is not working.
With #1, problem is the 2D bounding box is not visualized where it should be.
import numpy as np
import matplotlib.pyplot as plt
# Load the image and tight 2D bounding box
img = plt.imread('/hdd/SDG_out/fruit_box/2023-07-13_11:38:31/rgb_0070.png')
bb2d_tight = np.load('/hdd/SDG_out/fruit_box/2023-07-13_11:38:31/bounding_box_2d_tight_0070.npy')
# Extract the tight 2D bounding box coordinates
x_min = bb2d_tight['x_min'][0]
y_min = bb2d_tight['y_min'][0]
x_max = bb2d_tight['x_max'][0]
y_max = bb2d_tight['y_max'][0]
# Flip the y coordinate to match the Matplotlib coordinate system
height, width, _ = img.shape
y_min = height - y_min
y_max = height - y_max
# Define the bounding box rectangle
rect = plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, linewidth=2, edgecolor='r', facecolor='none')
# Plot the image and bounding box
fig, ax = plt.subplots()
ax.imshow(img)
ax.add_patch(rect)
plt.show()
as for 3d projected bounding box on the 2D image, I do need to have K. Here’s the code with a dummy K:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import cloudpickle
# Load the RGB image and the 2D and 3D bounding boxes
img = plt.imread('/hdd/SDG_out/fruit_box/2023-07-13_11:38:31/rgb_0070.png')
bb2d_tight = np.load('/hdd/SDG_out/fruit_box/2023-07-13_11:38:31/bounding_box_2d_tight_0070.npy', allow_pickle=True)
bb3d = np.load('/hdd/SDG_out/fruit_box/2023-07-13_11:38:31/bounding_box_3d_0070.npy', allow_pickle=True)
K = np.array([[1066.778, 0.0, 312.9868],
[0.0, 1067.487, 241.3109],
[0.0, 0.0, 1.0]])
print('K = {}'.format(K))
x_min = bb3d['x_min']
y_min = bb3d['y_min']
z_min = bb3d['z_min']
x_max = bb3d['x_max']
y_max = bb3d['y_max']
z_max = bb3d['z_max']
transform = bb3d['transform']
print('transform: ', transform)
print('transform.shape: ', transform.shape)
R = transform[0, :3, :3]
T = transform[0, :3, 3]
print('T: ', T)
print('R: ', R)
# Define the camera extrinsic matrix
RT = np.hstack((R, T.reshape(-1, 1)))
# Define the 3D bounding box vertices
vertices = np.array([[x_min, y_min, z_min],
[x_min, y_max, z_min],
[x_max, y_max, z_min],
[x_max, y_min, z_min],
[x_min, y_min, z_max],
[x_min, y_max, z_max],
[x_max, y_max, z_max],
[x_max, y_min, z_max]])
# Project the 3D bounding box vertices onto the 2D image plane
rvec, _ = cv2.Rodrigues(R)
points_2d, _ = cv2.projectPoints(vertices, rvec, T, K, np.zeros(5))
# Draw the projected bounding box on the RGB image
img = cv2.drawContours(img, [np.int32(points_2d[:4, :2])], -1, (0, 255, 0), 2)
img = cv2.drawContours(img, [np.int32(points_2d[4:, :2])], -1, (0, 255, 0), 2)
for i in range(4):
img = cv2.line(img, tuple(np.int32(points_2d[i, 0])), tuple(np.int32(points_2d[i+4, 0])), (0, 255, 0), 2)
# Display the image
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
can you please help figure how to access K, fix the visualization of tight 2D bounding box, as well as the 3D projected bounding box? I think I have an overall idea but need some guidance.