I created a synthetic dataset using Isaac Sim Replicator 2023.1.1. Then I trained it using TAO Toolkit CenterPose 5.2 end to end. I got inference using a different machine via TAO Toolkit 5.3.
I am trying to calculate the 6D pose estimation error for a single frame on one of the frames in test set.
I am confused why TAO centerpose inference is showing negative signs for some coordinates. Also, I am not sure what the unit of measurement is for 3D coordinates.
Do you have scripts for single frame gt and prediction for 6D pose prediction you could share?
gt photo:
gt_overlay.png created by isaac sim 2023.1.1
test.png (created by TAO Toolkit inference 5.3)
gt.json
{
"camera_data": {
"camera_view_matrix": [
[
0.5768750423474394,
0.32294544758696864,
-0.7502808963311624,
0.0
],
[
-0.8168324096879604,
0.22807514312980523,
-0.5298740827495517,
0.0
],
[
-2.7755575615628907e-17,
0.9185248864179845,
0.3953631660016249,
0.0
],
[
4.255467142125352,
-1.556385344143473,
-2.5398180373150963,
1.0
]
],
"camera_projection_matrix": [
[
2.2906228624246854,
0.0,
0.0,
0.0
],
[
0.0,
3.054163816566247,
0.0,
0.0
],
[
0.0,
0.0,
1.0000001149011741e-07,
-1.0
],
[
0.0,
0.0,
0.10000001149011742,
0.0
]
],
"width": 1920,
"height": 1440,
"intrinsics": {
"fx": 2198.997860547374,
"fy": 2198.997860547374,
"cx": 960.0,
"cy": 720.0
}
},
"keypoint_order": [
"Center",
"LDB",
"LDF",
"LUB",
"LUF",
"RDB",
"RDF",
"RUB",
"RUF"
],
"objects": [
{
"class": "pallet",
"name": "pallet_0",
"prim_path": "/Replicator/Ref_Xform_05/Ref",
"visibility": 1.0,
"local_to_world_transform": [
[
0.009418974630534649,
0.003359005553647876,
0.0,
0.0
],
[
-0.003359005553647876,
0.009418974630534649,
0.0,
0.0
],
[
0.0,
0.0,
0.009999999776482582,
0.0
],
[
1.6102886199951172,
9.348337173461914,
0.0,
1.0
]
],
"location": [
1.6555986640624638,
9.347971757153855,
-0.02539817980545739
],
"quaternion_xyzw": [
0.6653366977907593,
-0.5241820951684338,
0.16598659013311393,
0.5049838230811959
],
"projected_cuboid": [
[
340,
443
],
[
402,
500
],
[
398,
468
],
[
117,
474
],
[
112,
443
],
[
545,
415
],
[
543,
386
],
[
288,
395
],
[
285,
367
]
],
"keypoints_3d": [
[
-2.4516223219977977,
1.0957733738480568,
-8.701428411498572
],
[
-2.0372158084077787,
0.8035359668953665,
-8.02248976948253
],
[
-2.0372158084077787,
0.9148205606951385,
-7.974589238987964
],
[
-3.1890517797345064,
0.9307161580500445,
-8.31796031575615
],
[
-3.1890517797345064,
1.0420007518498164,
-8.270059785261584
],
[
-1.7141932058571596,
1.2608310031799692,
-9.08489746798092
],
[
-1.7141932058571596,
1.3721155969797412,
-9.036996937486354
],
[
-2.866029177183888,
1.3880111943346471,
-9.38036801425454
],
[
-2.866029177183888,
1.499295788134419,
-9.332467483759974
]
],
"scale": [
1.2009048461914062,
1.195911169052124,
0.12115577608346939
]
}
]
}
and test.json
{
"image_name": "07066.png",
"objects": [
{
"id": "object_0",
"keypoints_2d": [
[
391.3300037384033,
495.6400680541992
],
[
389.79649543762207,
463.56679916381836
],
[
108.62447261810303,
461.75045013427734
],
[
104.42878246307373,
433.51037979125977
],
[
535.3099822998047,
407.2760581970215
],
[
533.280029296875,
379.82648849487305
],
[
278.2712173461914,
381.37189865112305
],
[
276.9055652618408,
355.17282485961914
]
],
"keypoints_3d": [
[
-2.0921055201064394,
-0.99727728577068,
7.299869909964816
],
[
-1.7463693862038498,
-0.6909399274121011,
6.761579255949828
],
[
-1.744560957151571,
-0.7831011793242648,
6.719805903324282
],
[
-2.709530897245373,
-0.8180121938709188,
7.000232111776869
],
[
-2.7077224681930945,
-0.9101734457830826,
6.958458759151324
],
[
-1.4764885720197842,
-1.0843811257582774,
7.641281060778307
],
[
-1.4746801429675056,
-1.1765423776704411,
7.599507708152762
],
[
-2.439650083061308,
-1.2114533922170951,
7.879933916605349
],
[
-2.437841654009029,
-1.3036146441292589,
7.8381605639798035
]
],
"location": [
-2.0921055201064394,
-0.99727728577068,
7.299869909964816
],
"projected_keypoints_2d": [
[
327.2431498146669,
422.2658465223455
],
[
392.046508867219,
495.2928177527464
],
[
389.10764573230074,
463.73697057614527
],
[
108.84927371112576,
463.03608287536866
],
[
104.31110097735609,
432.36885504684153
],
[
535.0980516003362,
407.9382191815982
],
[
533.2857174553109,
379.5550303126802
],
[
279.18397160275015,
381.92820322712964
],
[
276.06292857093615,
354.2705932062541
]
],
"quaternion_xyzw": [
0.6725826267716993,
-0.5040012438348432,
0.3333825054329878,
0.4271667841125237
],
"relative_scale": [
1.0007531642913818,
1.0003912448883057,
0.10120266675949097
]
}
]
}
I did a reprojection error calculation but for this specific example, it states 107.11 pixels (which doesn’t seem right as prediction is pretty much aligned with gt).
def calculate_2d_error(test, gt):
"""
Compute the average Euclidean distance between corresponding aligned 2D points.
"""
distances = np.linalg.norm(test - gt, axis=1)
return np.mean(distances)
pred_keypoints_2d = pred_obj['projected_keypoints_2d'][:3] + pred_obj['projected_keypoints_2d'][4:]
test_corners_2d = np.array(pred_keypoints_2d)
gt_keypoints_2d = np.array(gt_obj['projected_cuboid'])[:8]
error_2d = calculate_2d_error(test_corners_2d, gt_keypoints_2d)
for 3D coordinates error:
def transform_gt_data(gt_obj):
"""
Transform ground truth 3D data from its native coordinate system to that used in test data.
Observation:
– The GT keypoints appear to be in a coordinate system that is mirrored in y and z.
– We apply a reflection using F = diag(1, -1, -1).
"""
F = np.diag([1, -1, -1])
gt_keypoints = np.array(gt_obj['keypoints_3d'])
# Apply reflection to all 3D keypoints (including the center at index 0)
gt_keypoints_trans = (F @ gt_keypoints.T).T
# Transform the GT rotation similarly:
gt_quat = gt_obj['quaternion_xyzw']
R_gt = Rotation.from_quat(gt_quat).as_matrix()
R_gt_trans = F @ R_gt @ F
gt_quat_trans = Rotation.from_matrix(R_gt_trans).as_quat()
return gt_quat_trans.tolist(), gt_keypoints_trans.tolist()
gt_obj = None
for obj in gt_data['objects']:
if not np.isnan(obj.get('visibility', 0)):
gt_obj = obj
break
if gt_obj is None:
raise ValueError("No visible object found in ground truth data")
gt_quat_trans, gt_keypoints_3d = transform_gt_data(gt_obj)
pred_keypoints_3d = pred_obj['keypoints_3d']
test_corners_3d = np.array(pred_keypoints_3d[:])
gt_corners_3d = np.array(gt_keypoints_3d[:])
test_corners_3d = np.array(pred_keypoints_3d[:])
gt_corners_3d = np.array(gt_keypoints_3d[:])
error_3d = calculate_3d_error(test_corners_3d, gt_corners_3d)
I get 3D Corner Error: 143.72 cm
I am not sure if it is cm, mm, or m in isaac sim. I am also not sure if my calculation is correct.
for 3D keypoints, I see that the signs of components is not same.
Also for prediction, test.json, I see both keypoints_2d
and also projected_keypoints_2d
. I am not sure what keypoints_2d
refer to but I believe projected_keypoints_2d
refers to projected_cuboid
in gt.json. Could you please provide some clarification?
keypoints_3d in test.json:
"keypoints_3d": [
[
-2.0921055201064394,
-0.99727728577068,
7.299869909964816
],
[
-1.7463693862038498,
-0.6909399274121011,
6.761579255949828
],
[
-1.744560957151571,
-0.7831011793242648,
6.719805903324282
],
[
-2.709530897245373,
-0.8180121938709188,
7.000232111776869
],
[
-2.7077224681930945,
-0.9101734457830826,
6.958458759151324
],
[
-1.4764885720197842,
-1.0843811257582774,
7.641281060778307
],
[
-1.4746801429675056,
-1.1765423776704411,
7.599507708152762
],
[
-2.439650083061308,
-1.2114533922170951,
7.879933916605349
],
[
-2.437841654009029,
-1.3036146441292589,
7.8381605639798035
]
]
as it seems the y and z components in test.json have a negative sign compared to gt.json, why is that and how can I count for this when calculating the 6D pose estimation error in terms of translation and rotation?
and keypoints_3d in gt.json:
"keypoints_3d": [
[
-2.4516223219977977,
1.0957733738480568,
-8.701428411498572
],
[
-2.0372158084077787,
0.8035359668953665,
-8.02248976948253
],
[
-2.0372158084077787,
0.9148205606951385,
-7.974589238987964
],
[
-3.1890517797345064,
0.9307161580500445,
-8.31796031575615
],
[
-3.1890517797345064,
1.0420007518498164,
-8.270059785261584
],
[
-1.7141932058571596,
1.2608310031799692,
-9.08489746798092
],
[
-1.7141932058571596,
1.3721155969797412,
-9.036996937486354
],
[
-2.866029177183888,
1.3880111943346471,
-9.38036801425454
],
[
-2.866029177183888,
1.499295788134419,
-9.332467483759974
]
]
Note: For any Isaac Lab topics, please submit your topic to its GitHub repo (GitHub - isaac-sim/IsaacLab: Unified framework for robot learning built on NVIDIA Isaac Sim) following the instructions provided on Isaac Lab’s Contributing Guidelines (Contribution Guidelines — Isaac Lab Documentation).
Please provide all relevant details below before submitting your post. This will help the community provide more accurate and timely assistance. After submitting, you can check the appropriate boxes. Remember, you can always edit your post later to include additional information if needed.
Isaac Sim Version
4.5.0
4.2.0
4.1.0
4.0.0
4.5.0
2023.1.1
2023.1.0-hotfix.1
Other (please specify):
Operating System
Ubuntu 22.04
Ubuntu 20.04
Windows 11
Windows 10
Other (please specify):
GPU Information
- Model: NVIDIA RTX 6000 Ada
- Driver Version: Not sure what the driver version was at time of data creation (updated one: Driver Version: 565.57.01 CUDA Version: 12.7)
Topic Description
Detailed Description
(Describe the issue in detail, including what you were trying to do, what you expected to happen, and what actually happened)
Steps to Reproduce
(Add more steps as needed)
Error Messages
(If applicable, copy and paste any error messages you received)
Screenshots or Videos
(If applicable, add screenshots or links to videos that demonstrate the issue)
Additional Information
What I’ve Tried
(Describe any troubleshooting steps you’ve already taken)
Related Issues
(If you’re aware of any related issues or forum posts, please link them here)
Additional Context
(Add any other context about the problem here)