4.5.0 Synthetic data generation - problem with annotation

Hello,

I try to generate synthetic data for a pose estimation model I plan to train.

I have build up my custom environment with a vehicle robot and a asset, both urdf files. Installed on the robot is a camera. I made a script wich is supposed to randomize the position of the asset, render an image from the perspective of the camera named after the randomized position, save a boundingbox of a mesh of the asset and add the informations to a single csv. The mesh prim has a sematic label “class a_frame_female”.

import omni.kit.commands
import pxr
import numpy as np
import carb
import os
import asyncio
import omni.replicator.core as rep
from omni.isaac.core.utils.stage import get_current_stage
from omni.isaac.core.utils.prims import get_prim_at_path
from pxr import Gf, Usd
import csv
import json
import time

class TrainingDataGenerator:
    def __init__(self):
        self.stage = get_current_stage()

    def randomize_implement_pose(self, implement_path, x_range, y_range, z_range, yaw_range):
        x = np.random.uniform(x_range[0], x_range[1])
        y = np.random.uniform(y_range[0], y_range[1])
        z = np.random.uniform(z_range[0], z_range[1])
        yaw_deg = float(np.random.uniform(yaw_range[0], yaw_range[1]))
        print(f"Set yaw: {yaw_deg}")

        omni.kit.commands.execute('ChangeProperty',
            prop_path=f"{implement_path}.xformOp:translate",
            value=pxr.Gf.Vec3d(x, y, z), prev=None)
        
        quat = Gf.Quatd(Gf.Rotation(Gf.Vec3d(0, 0, 1), yaw_deg).GetQuat())

        omni.kit.commands.execute('ChangeProperty',
            prop_path=f"{implement_path}.xformOp:orient",
            value=quat, prev=None)
        
        return x, y, z, yaw_deg


    def parse_replicator_annotations(self, output_dir, image_idx, target_class="a_frame_female"):
        try:
            labels_path = os.path.join(output_dir, f"bounding_box_2d_tight_labels_{image_idx}.json")
            bboxes_path = os.path.join(output_dir, f"bounding_box_2d_tight_{image_idx}.npy")
            if not os.path.exists(labels_path) or not os.path.exists(bboxes_path):
                print("Keine Annotationsdateien gefunden")
                return None
            with open(labels_path, "r") as f:
                labels = json.load(f)
            for k, v in labels.items():
                if v.get("class") == target_class:
                    bbox_idx = int(k)
                    break
            else:
                print(f"Keine Bounding Box für '{target_class}' gefunden!")
                return None
            bboxes = np.load(bboxes_path)
            bbox = bboxes[bbox_idx]
            print("Labels:", labels)
            print("Bounding Boxes:", bboxes)
            return {
                "x_min": int(bbox[0]),
                "y_min": int(bbox[1]),
                "x_max": int(bbox[0] + bbox[2]),
                "y_max": int(bbox[1] + bbox[3]),
                "width": int(bbox[2]),
                "height": int(bbox[3])
            }
            
        except Exception as e:
            print(f"Fehler beim Parsen der Replicator-Annotationen: {e}")
            return None

    def save_annotations_to_single_csv(self, csv_path, image_filename, bbox_data, pose_data):
        """
        Bounding Box und Pose Daten als Zeile in eine zentrale CSV speichern
        """
        file_exists = os.path.isfile(csv_path)
        with open(csv_path, 'a', newline='') as csvfile:
            writer = csv.writer(csvfile)
            # Header nur einmal schreiben
            if not file_exists:
                writer.writerow(['image', 'type', 'x_min', 'y_min', 'x_max', 'y_max', 'width', 'height', 'pose_x', 'pose_y', 'pose_z', 'pose_yaw'])
            writer.writerow([
                image_filename,
                'a_frame_female',
                bbox_data['x_min'], bbox_data['y_min'], 
                bbox_data['x_max'], bbox_data['y_max'],
                bbox_data['width'], bbox_data['height'],
                pose_data['x'], pose_data['y'], pose_data['z'], pose_data['yaw']
            ])
        print(f"Annotation hinzugefügt zu: {os.path.basename(csv_path)}")

    async def generate_training_data(self, config):
        
        stage = omni.usd.get_context().get_stage()
        root_layer = stage.GetRootLayer()
        stage.SetEditTarget(Usd.EditTarget(root_layer))
        
        print("Starte Trainingsdatengenerierung...")
        os.makedirs(config['output_dir'], exist_ok=True)

        annotations_csv_path = os.path.join(config['output_dir'], "annotations_all.csv")

        successful_images = 0

        camera_path = config['camera_path']  
        stage = omni.usd.get_context().get_stage()
        camera_prim = stage.GetPrimAtPath(camera_path)
        
        if not camera_prim:
            carb.log_error(f"Kamera nicht gefunden: {camera_path}")
            return        
        print("Current authoring layer:", stage.GetEditTarget().GetLayer().identifier)

        # ===== REPLICATOR WRITER MIT BOUNDING BOX =====
        render_product = rep.create.render_product(camera_path, resolution=(config['width'], config['height']))
        
        # Annotator für das Zielobjekt
        bb_annotator = rep.AnnotatorRegistry.get_annotator("bounding_box_2d_tight")
        bb_annotator.attach([config['boundingbox_prim_path']])
    
        writer = rep.WriterRegistry.get("BasicWriter")
        writer.initialize(
            output_dir=config['output_dir'], 
            rgb=True, 
            bounding_box_2d_tight=True,  
            frame_padding=0
        )
        
        writer.attach([render_product])
        # ==============================================

        for i in range(config['num_images']):
                print(f"Generiere Bild {i+1}/{config['num_images']}")
                
                # Pose randomisieren und Koordinaten erhalten
                x, y, z, yaw = self.randomize_implement_pose(config['implement_path'],
                                                        config['x_range'], config['y_range'], 
                                                        config['z_range'], config['yaw_range'])
                
                await omni.kit.app.get_app().next_update_async()
                await asyncio.sleep(2)

                # Vor dem Rendern: Merke die vorhandenen Dateien
                before_files = set(f for f in os.listdir(config['output_dir']) 
                                 if (f.startswith('rgb_') and f.endswith('.png')) or f.endswith('.json'))

                # Bild rendern
                await rep.orchestrator.step_async()

                # Warte, bis neue Dateien erscheinen (Bild + JSON)
                for _ in range(100):
                    await asyncio.sleep(0.1)
                    after_files = set(f for f in os.listdir(config['output_dir']) 
                                    if (f.startswith('rgb_') and f.endswith('.png')) or f.endswith('.json'))
                    new_files = after_files - before_files
                    new_images = [f for f in new_files if f.startswith('rgb_') and f.endswith('.png')]
                    if new_images:
                        break
                else:
                    print("Timeout: Kein neues Bild gefunden!")
                    continue

                # Neuestes Bild umbenennen
                latest_image = max(new_images, key=lambda x: os.path.getctime(os.path.join(config['output_dir'], x)))
                old_path = os.path.join(config['output_dir'], latest_image)
                filename = f"img_{i+1:04d}_x{x:.2f}_y{y:.2f}_z{z:.2f}_yaw{yaw:.1f}.png"
                output_path = os.path.join(config['output_dir'], filename)
                os.rename(old_path, output_path)
                print(f"Bild gespeichert als: {filename}")
                
                # ===== REPLICATOR BOUNDING BOX DATEN LESEN =====
                # Extrahiere Index aus Originalbildname
                image_idx = int(latest_image.split("_")[1].split(".")[0])

                # Warte auf Annotationsdateien
                labels_path = os.path.join(config['output_dir'], f"bounding_box_2d_tight_labels_{image_idx}.json")
                bboxes_path = os.path.join(config['output_dir'], f"bounding_box_2d_tight_{image_idx}.npy")
                for _ in range(100):
                    if os.path.exists(labels_path) and os.path.exists(bboxes_path):
                        break
                    time.sleep(0.1)

                bbox_data = self.parse_replicator_annotations(config['output_dir'], image_idx)
                
                if bbox_data:
                    pose_data = {'x': x, 'y': y, 'z': z, 'yaw': yaw}
                    self.save_annotations_to_single_csv(annotations_csv_path, filename, bbox_data, pose_data)
                    print(f"Bounding Box: x_min={bbox_data['x_min']}, y_min={bbox_data['y_min']}, x_max={bbox_data['x_max']}, y_max={bbox_data['y_max']}")
                else:
                    print("Warnung: Keine gültige Bounding Box von Replicator erhalten!")
                # ===============================================
                
                successful_images += 1
                print(f"Bild {i+1} erfolgreich generiert")

        print(f"Datengenerierung abgeschlossen! {successful_images}/{config['num_images']} Bilder erstellt.")
        print(f"Alle Annotationen gespeichert in: {annotations_csv_path}")

# Konfiguration
config = {
    'num_images': 10,
    'implement_path': '/Equipment/Anbaugrubber_2',  
    'boundingbox_prim_path': '/Equipment/Anbaugrubber_2/base_footprint/visuals/A_Frame_Kat2_65_female_collision_scaled0001/a_frame_kat2_65_female_collisio_002/a_frame_kat2_65_female_collisio_002',  
    'camera_path': '/Equipment/Kubota_M7/base_footprint/mainbody/Sensors/ZED_X/base_link/ZED_X/CameraLeft',
    'output_dir': 'C:/projects/ac_trainingdata/Trainingpic_with_annotators',
    'x_range': [-7.0, -2.0], 'y_range': [-3.0, 3.0], 'z_range': [1.2, 2.0], 'yaw_range': [-180, 180],
    'width': 960, 'height': 600,
}

async def main():
    generator = TrainingDataGenerator()
    await generator.generate_training_data(config)

if __name__ == "__main__":
    asyncio.ensure_future(main()) 

The problem is that the bbox is not right. first of all the x_min is allways 0 and secondly the size doesn’t fit at all. The meshprim is allways in the image!

This image shows one example, its image 8. It shows the bbox placed wrong in the left low corner. I draw a custom bbox in red where it should be:

here the annotation csv:

image,type,x_min,y_min,x_max,y_max,width,height,pose_x,pose_y,pose_z,pose_yaw
img_0001_x-5.23_y-0.04_z1.22_yaw144.4.png,a_frame_female,0,492,166,1011,166,519,-5.2262789766770705,-0.041770123002617954,1.223353787563733,144.3688125029891
img_0002_x-4.77_y2.49_z1.34_yaw-139.2.png,a_frame_female,0,624,171,1285,171,661,-4.774616805328546,2.494645375923291,1.3449903634473597,-139.23984049143613
img_0003_x-2.37_y-0.85_z1.82_yaw48.8.png,a_frame_female,0,358,273,811,273,453,-2.3749213312965853,-0.8464846164606397,1.815680176622795,48.81947435828229
img_0004_x-2.44_y-1.35_z1.54_yaw59.0.png,a_frame_female,0,313,294,710,294,397,-2.4393148555603457,-1.3521188715139132,1.5437081451220749,58.99371697579036
img_0005_x-3.98_y0.37_z1.72_yaw-72.4.png,a_frame_female,0,474,180,979,180,505,-3.98434880301739,0.37437918320288954,1.7172700750901244,-72.42913071387943
img_0006_x-2.64_y0.13_z1.52_yaw93.2.png,a_frame_female,0,530,263,1084,263,554,-2.643824313457184,0.12972335530323642,1.5163531264870538,93.22255921903474
img_0007_x-3.98_y0.09_z1.78_yaw161.0.png,a_frame_female,0,468,159,1002,159,534,-3.9780743885027503,0.09345817824766733,1.7755917870634086,160.97206576716883
img_0008_x-2.05_y1.27_z1.21_yaw-108.5.png,a_frame_female,0,586,334,1205,334,619,-2.045274352138234,1.2742930626083648,1.2093632418382978,-108.54815752888386
img_0009_x-6.38_y-1.69_z1.89_yaw-36.1.png,a_frame_female,0,339,103,718,103,379,-6.379375776524981,-1.6934346604558561,1.8946396872878548,-36.11865774691162
img_0010_x-6.62_y-2.49_z1.76_yaw-74.6.png,a_frame_female,0,297,106,612,106,315,-6.621270180936234,-2.4934980794077526,1.7606370932410693,-74.64619936240264

I opened the mesh merge tool in my stage and put the mesh prim as source. It shows it has apparently two submeshes. When I merge the same mesh with a scaling factor 1000 appears. I downscaled the usd mesh in blender before. The URDF file doesn’t scale! I wonder how I get rid of the submesh, it seems like it can be the problem.

I wonder what else could be the problem. I need to have a model wich finds the triangle mesh pose.