After SDG Generation GPU Overload in Kit

Hello everyone,

I’m having trouble using the new Omniverse Kit Version 106.0.2 with Replicator 1.11.18. After executing my script for Domain Randomization the GPU Memory overloads. It increases 1GB steadily until it reaches more than the maximum of my GPU.
If I remember correctly, it started to happen after I switched from Omniverse Code to Kit and the newer Replicator Version. The only thing I changed in the code was chanig from the BasicWriter to the CocoWriter.

Any tips on solving this? Could my python code be the problem?

import omni.replicator.core as rep
import os
import datetime
import carb
import numpy as np
from pxr import Sdf, Usd, Gf
import omni.kit.commands

# Configuration constants
BASE_PATH = "D:/USERS/xy/Omniverse/Assets"
COMPONENTS_PATH = os.path.join(BASE_PATH, "bauteile")
RENDERED_PATH = os.path.join(BASE_PATH, "MA_Omniverse", "data", "rendered")
COMPONENT_NAME = "Napf_1108"
COMPONENT_PATH = os.path.join(COMPONENTS_PATH, f"{COMPONENT_NAME}.usd")
NUM_FRAMES = 50

# Table coordinates
X_MIN, X_MAX = -225.6, -198
Y_MIN, Y_MAX = -266, -83.7
Z_HEIGHT = 98

# Calculate middle point
X_MID = (X_MIN + X_MAX) / 2
Y_MID = (Y_MIN + Y_MAX) / 2
TABLE_ORIGIN = (X_MID, Y_MID, Z_HEIGHT)

# Camera movement ranges
FRONT_CAM_X_MIN, FRONT_CAM_X_MAX = 0, 30
FRONT_CAM_Y_MIN, FRONT_CAM_Y_MAX = -200, -150
FRONT_CAM_Z_MIN, FRONT_CAM_Z_MAX = 110, 150

RIGHT_CAM_X_MIN, RIGHT_CAM_X_MAX = X_MIN, X_MAX + 30
RIGHT_CAM_Y_MIN, RIGHT_CAM_Y_MAX = Y_MIN - 30, Y_MIN - 10
RIGHT_CAM_Z_MIN, RIGHT_CAM_Z_MAX = Z_HEIGHT + 20, Z_HEIGHT + 100

LEFT_CAM_X_MIN, LEFT_CAM_X_MAX = X_MIN, X_MAX
LEFT_CAM_Y_MIN, LEFT_CAM_Y_MAX = Y_MAX + 30, Y_MAX + 70
LEFT_CAM_Z_MIN, LEFT_CAM_Z_MAX = Z_HEIGHT + 20, Z_HEIGHT + 100

TOP_CAM_X_MIN, TOP_CAM_X_MAX = X_MIN, X_MAX
TOP_CAM_Y_MIN, TOP_CAM_Y_MAX = -216, -158
TOP_CAM_Z_MIN, TOP_CAM_Z_MAX = Z_HEIGHT + 100, Z_HEIGHT + 150

# Light parameters
LIGHT_Z_HEIGHT = Z_HEIGHT + 200
LIGHT_INTENSITY_MIN, LIGHT_INTENSITY_MAX = 70000, 400000 
LIGHT_TEMPERATURE_MIN, LIGHT_TEMPERATURE_MAX = 4000, 10000
LIGHT_INTENSITY_DEFAULT = 30000
LIGHT_TEMPERATURE_DEFAULT = 8000

# Define which lights are enabled (True) or disabled (False)
enabled_lights = {
    "light_1": True,
    "light_2": True,
    "light_3": True,
    "light_4": True,
    "light_5": True,
    "light_6": True
}

# Material property ranges
MATERIAL_PROPERTIES = {
    "inputs:ground_coat_influence": (0.0, 1.0),
    "inputs:ground_coat_brightness": (0.0, 1.0),
    "inputs:edge_darkening": (0.0, 1.0),
    "inputs:flakes_roughness": (0.0, 1.0)
}

def translate_coordinates(x_offset=0, y_offset=0, z_offset=0):
    translated_x = X_MID + x_offset
    translated_y = Y_MID + y_offset
    translated_z = Z_HEIGHT + z_offset
    return translated_x, translated_y, translated_z

def randomize_component():
    instances = rep.create.from_usd(COMPONENT_PATH, semantics=[("class", COMPONENT_NAME)])
    with instances:
        rep.modify.pose(
            position=rep.distribution.uniform((X_MIN, Y_MIN, Z_HEIGHT), (X_MAX, Y_MAX, Z_HEIGHT)),
            rotation=(0, 0, 0),
            scale=(1, 1, 1)
        )
    return instances

def randomize_front_camera(camera):
    with camera:
        rep.modify.pose(
            position=rep.distribution.uniform(
                (FRONT_CAM_X_MIN, FRONT_CAM_Y_MIN, FRONT_CAM_Z_MIN),
                (FRONT_CAM_X_MAX, FRONT_CAM_Y_MAX, FRONT_CAM_Z_MAX)
            ),
            look_at=TABLE_ORIGIN
        )
    return camera

def randomize_right_camera(camera):
    with camera:
        rep.modify.pose(
            position=rep.distribution.uniform(
                (RIGHT_CAM_X_MIN, RIGHT_CAM_Y_MIN, RIGHT_CAM_Z_MIN),
                (RIGHT_CAM_X_MAX, RIGHT_CAM_Y_MAX, RIGHT_CAM_Z_MAX)
            ),
            look_at=TABLE_ORIGIN
        )
    return camera

def randomize_left_camera(camera):
    with camera:
        rep.modify.pose(
            position=rep.distribution.uniform(
                (LEFT_CAM_X_MIN, LEFT_CAM_Y_MIN, LEFT_CAM_Z_MIN),
                (LEFT_CAM_X_MAX, LEFT_CAM_Y_MAX, LEFT_CAM_Z_MAX)
            ),
            look_at=TABLE_ORIGIN
        )
    return camera

def randomize_top_camera(camera):
    with camera:
        rep.modify.pose(
            position=rep.distribution.uniform(
                (TOP_CAM_X_MIN, TOP_CAM_Y_MIN, TOP_CAM_Z_MIN),
                (TOP_CAM_X_MAX, TOP_CAM_Y_MAX, TOP_CAM_Z_MAX)
            ),
            look_at=TABLE_ORIGIN
        )
    return camera

def randomize_light(light):
    with light:
        rep.modify.attribute(
            "intensity", 
            rep.distribution.uniform(LIGHT_INTENSITY_MIN, LIGHT_INTENSITY_MAX)
        )
        rep.modify.attribute(
            "temperature", 
            rep.distribution.uniform(LIGHT_TEMPERATURE_MIN, LIGHT_TEMPERATURE_MAX)
        )
    return light

def setup_lights(intensity=LIGHT_INTENSITY_DEFAULT, temperature=LIGHT_TEMPERATURE_DEFAULT):
    light_positions = [
        (-268, -215, 400),
        (-268, -380, 400),
        (200, -380, 400),
        (200, -215, 400),
        (500, -380, 400),
        (500, -215, 400)
    ]
    lights = []
    for i, pos in enumerate(light_positions):
        light_name = f"light_{i + 1}"
        if enabled_lights.get(light_name, False):
            light = rep.create.light(
                light_type="cylinder",
                position=pos,
                rotation=(0, 0, 90),
                scale=(324, 10, 4),
                intensity=intensity,
                temperature=temperature
            )
            lights.append(light)
            print(f"{light_name} created at position {pos}")
        else:
            print(f"{light_name} is disabled.")
    return lights

def randomize_material_properties():
    stage = omni.usd.get_context().get_stage()
    material_path = Sdf.Path("/Replicator/Ref_Xform/Ref/Looks/Gunmetal_Matte_Metallic/Shader")
    
    for property_name, (min_value, max_value) in MATERIAL_PROPERTIES.items():
        material_property = stage.GetPrimAtPath(material_path).GetAttribute(property_name)
        if not material_property:
            print(f"Property '{property_name}' does not exist on material at '{material_path}'")
            continue
        new_value = np.random.uniform(min_value, max_value)
        current_value = material_property.Get()
        omni.kit.commands.execute(
            "ChangeProperty",
            prop_path=material_property.GetPath(),
            value=new_value,
            prev=current_value
        )
        print(f"Changed '{property_name}' from {current_value} to {new_value}")

# Register randomizer functions
rep.randomizer.register(randomize_component)
rep.randomizer.register(randomize_front_camera)
rep.randomizer.register(randomize_right_camera)
rep.randomizer.register(randomize_left_camera)
rep.randomizer.register(randomize_top_camera)
rep.randomizer.register(randomize_light)

def setup_scene():
    with rep.new_layer():
        rep.settings.set_stage_up_axis("Z")
        
        print("Setting up the cameras...")
        front_camera = rep.create.camera(position=translate_coordinates(80, -5, 30), look_at=TABLE_ORIGIN)
        right_camera = rep.create.camera(position=translate_coordinates(0, 40, 10), look_at=TABLE_ORIGIN)
        left_camera = rep.create.camera(position=translate_coordinates(0, -40, 10), look_at=TABLE_ORIGIN)
        top_camera = rep.create.camera(position=translate_coordinates(0, 0, 50), look_at=TABLE_ORIGIN)
        
        front_render_product = rep.create.render_product(front_camera, resolution=(640, 480))
        right_render_product = rep.create.render_product(right_camera, resolution=(640, 480))
        left_render_product = rep.create.render_product(left_camera, resolution=(640, 480))
        top_render_product = rep.create.render_product(top_camera, resolution=(640, 480))
        
        print("Cameras setup complete.")

        print("Setting up the lights...")
        lights = setup_lights()
        print("Lights setup complete.")

        print("Setting up the component...")
        component = rep.create.from_usd(COMPONENT_PATH, semantics=[("class", COMPONENT_NAME)])
        with component:
            rep.modify.pose(
                position=TABLE_ORIGIN,
                rotation=(0, 0, 0),
                scale=(1, 1, 1)
            )
        print("Component setup complete.")

        print("Initializing the writer...")
        output_dir = os.path.join(RENDERED_PATH, f"{COMPONENT_NAME}_{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}")
        writer = rep.WriterRegistry.get("CocoWriter")
        coco_categories = {
            COMPONENT_NAME: {
                'name': COMPONENT_NAME,
                'id': 1,
                'supercategory': 'bowl'
            }
        }

        writer.initialize(
            output_dir=output_dir,
            semantic_types=["class"],
            rgb=True,
            bounding_box_2d_tight=True,
            image_output_format="png",
            coco_categories=coco_categories
        )

        writer.attach([front_render_product, right_render_product, left_render_product, top_render_product])
        print(f"Writer initialized. Output will be saved to: {output_dir}")

        rep.settings.set_render_rtx_realtime()

        print("Setting up randomization trigger...")
        with rep.trigger.on_frame(num_frames=NUM_FRAMES):
            rep.randomizer.randomize_component()
            rep.randomizer.randomize_front_camera(front_camera)
            rep.randomizer.randomize_right_camera(right_camera)
            rep.randomizer.randomize_left_camera(left_camera)
            rep.randomizer.randomize_top_camera(top_camera)
            for light in lights:
                rep.randomizer.randomize_light(light)
            randomize_material_properties()
        print("Randomization trigger set up for", NUM_FRAMES, "frames.")

        print("Starting the simulation...")
        try:
            rep.orchestrator.run()
            print(f"Simulation completed successfully. Images saved in: {output_dir}")
        except Exception as e:
            carb.log_error(f"An error occurred during simulation: {str(e)}")

if __name__ == "__main__":
    setup_scene()
    print("Script execution finished.")
1 Like

Yeah, this. All of my replicator code that used to work fine, now blows out the GPU memory and then just hangs. Code 2022 isn’t even available now. So I’m dead in the water.

I’m running on an RTX 4090 with 24 GB VRAM.

Here’s the identical Replicator code on Isaac, after drawing and waiting a minute

And here with the latest Kit 106, FFW 2 min to see memory leaking away

1 Like

Kit 106.1.0 beta fixed this for me. FYI

For me the update stops crashing my PC completely, but it still lags to the point where I have to restart Kit.
After executing the script once, the FPS drops to roughly 1 in the HUD and makes using Kit impossible. I always have to restart and I can’t make up any reasons why it behaves like that.

Interesting, I hadn’t really tried to generate more than three frames. Funny because I did a THOROUGH analysis a while back on frame rate vs. frame and saw the FPS is a direction function of frame, i.e. it got slower every frame. Posted the whole analysis here. I’d dig it up but the punchline is I restarted my headless frame generator every 100 frames and that was my workaround.

Another thing is, on my other computer Kit 106.1.0 crashes upon launch, period. But I just found 106.0.2 does too. So I’ll just do another complete scrub of OV and start over from scratch.
image

But at least the renders are not getting slower each frame. (on the one computer that doesn’t crash on launch).
image