Orbit multiple cameras (per env) performances optimization

Hello,
I’m planning to train skills using the latest orbit(+isaacsim) RL framework. For this I need to add cameras into the scene et then train using those visual tensor data.

As a dummy test, I created a simple test code (See below) to validate the fact that I can retrieve visual data as expected.
And it works indeed!

My concerns are the following:

  • it seems rather slow: around 10 FPS with 2 cameras and 24 environments (num_envs)
  • it gets OOM if I try more than 32 envs on my a RTX A6000 with 50G GPU mem
  • The allocated memory seems kinda fixed. The resolution of the camera almost has no impact on the GPU footprint nor the speed :-(
  • the reconstructed (from tensors) rgb images are ugly

Of course when I remove the cameras (num_cams=0), it goes much faster and seems to consume very little mem (managed to run 8096 envs) as expected.

Could you double check my code below and possibly points me towards optimizations to speed up the processes?
Should the scene be degraded? Less effects?
Is the render really deactivated when running in headless mode?
Other tricks?

Thanx.

=====================================================================

from __future__ import annotations

import argparse

from omni.isaac.orbit.app import AppLauncher



# add argparse arguments
parser = argparse.ArgumentParser(description="Tutorial on adding sensors on a robot.")
parser.add_argument("--num_envs", type=int, default=16, help="Number of environments to spawn.")
parser.add_argument("--num_cams", type=int, default=2, help="Number of cams per env (2 Max)")
parser.add_argument("--save", action="store_true", default=False, help="Save the obtained data to disk.")

# append AppLauncher cli args
AppLauncher.add_app_launcher_args(parser)
# parse the arguments
args_cli = parser.parse_args()
args_cli.num_cams = min(2, args_cli.num_cams)
args_cli.num_cams = max(0, args_cli.num_cams)
args_cli.num_envs = max(2, args_cli.num_envs)

# launch omniverse app
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app


import torch
import traceback

import carb
import os
import omni.isaac.orbit.sim as sim_utils
from omni.isaac.orbit.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.orbit.scene import InteractiveScene, InteractiveSceneCfg
from omni.isaac.orbit.sensors import CameraCfg, ContactSensorCfg, RayCasterCfg, patterns
from omni.isaac.orbit.utils import configclass
from omni.isaac.orbit.utils.timer import Timer
import omni.replicator.core as rep
from omni.isaac.orbit.utils import convert_dict_to_backend
from tqdm import tqdm


from omni.isaac.orbit_assets.anymal import ANYMAL_C_CFG


@configclass
class SensorsSceneCfg(InteractiveSceneCfg):
    """Design the scene with sensors on the robot."""

    # ground plane
    ground = AssetBaseCfg(prim_path="/World/defaultGroundPlane", spawn=sim_utils.GroundPlaneCfg())

    # lights
    dome_light = AssetBaseCfg(
        prim_path="/World/Light", spawn=sim_utils.DomeLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75))
    )

    # robot
    robot: ArticulationCfg = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")

    if args_cli.num_cams > 0:
        # sensors
        camera_1 = CameraCfg(
            prim_path="{ENV_REGEX_NS}/Robot/base/front_cam",
            update_period=0.1,
            height=192,
            width=240,
            data_types=["rgb"],#, "distance_to_image_plane"],
            spawn=sim_utils.PinholeCameraCfg(
                focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1.0e5)
            ),
            offset=CameraCfg.OffsetCfg(pos=(0.510, 0.0, 0.015), rot=(0.5, -0.5, 0.5, -0.5), convention="ros"),
        )
    
    if args_cli.num_cams > 1:
        camera_2 = CameraCfg(
            prim_path="{ENV_REGEX_NS}/Robot/base/rear_cam",
            update_period=0.1,
            height=192,
            width=240,
            data_types=["rgb"],#, "distance_to_image_plane"],
            spawn=sim_utils.PinholeCameraCfg(
                focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1.0e5)
            ),
            offset=CameraCfg.OffsetCfg(pos=(0.310, 0.50, 0.015), rot=(-0.5, 0.5, -0.5, 0.5), convention="ros"),
        )


def run_simulator(
    sim: sim_utils.SimulationContext,
    scene: InteractiveScene,
):
    """Run the simulator."""

    # Define simulation stepping
    sim_dt = sim.get_physics_dt()
    sim_time = 0.0
    
    def reset():
        # reset the scene entities
        # root state
        # we offset the root state by the origin since the states are written in simulation world frame
        # if this is not done, then the robots will be spawned at the (0, 0, 0) of the simulation world
        root_state = scene["robot"].data.default_root_state.clone()
        root_state[:, :3] += scene.env_origins
        scene["robot"].write_root_state_to_sim(root_state)
        # set joint positions with some noise
        joint_pos, joint_vel = (
            scene["robot"].data.default_joint_pos.clone(),
            scene["robot"].data.default_joint_vel.clone(),
        )
        joint_pos += torch.rand_like(joint_pos) * 0.1
        scene["robot"].write_joint_state_to_sim(joint_pos, joint_vel)
        # clear internal buffers
        scene.reset()
        print("[INFO]: Resetting robot state...")

    output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output", "camera")
    rep_writer = rep.BasicWriter(output_dir=output_dir, frame_padding=3)
    
    episode_steps = 100
    while simulation_app.is_running():
        reset()
        
        with Timer(f"Time taken for {episode_steps} steps with {args_cli.num_envs} envs"):
            with tqdm(range(episode_steps*args_cli.num_envs)) as pbar:
                for count in range(episode_steps):
                    # Apply default actions to the robot
                    # -- generate actions/commands
                    targets = scene["robot"].data.default_joint_pos
                    # -- apply action to the robot
                    scene["robot"].set_joint_position_target(targets)
                    # -- write data to sim
                    scene.write_data_to_sim()
                    # perform step
                    sim.step()
                    # update sim-time
                    sim_time += sim_dt
                    count += 1
                    # update buffers
                    scene.update(sim_dt)
                    pbar.update(args_cli.num_envs)
                    
                    # Extract camera data
                    if args_cli.save:
                        for i in range(args_cli.num_envs):
                            for j in range(args_cli.num_cams):
                                single_cam_data = convert_dict_to_backend(scene[f"camera_{j+1}"].data.output, backend="numpy")
                                #single_cam_info = scene[f"camera_{j+1}"].data.info

                                # Pack data back into replicator format to save them using its writer
                                rep_output = dict()
                                for key, data in zip(single_cam_data.keys(), single_cam_data.values()):#, single_cam_info):
                                    # if info is not None:
                                    #     rep_output[key] = {"data": data, "info": info}
                                    # else:
                                    rep_output[key] = data[i]
                                # Save images
                                # Note: We need to provide On-time data for Replicator to save the images.
                                rep_output["trigger_outputs"] = {"on_time":f"{count}_{i}_{j}"}#{"on_time": scene["camera_1"].frame}
                                rep_writer.write(rep_output)
                    
                    if args_cli.num_cams > 0:
                        cam1_rgb = scene["camera_1"].data.output["rgb"]
                    if args_cli.num_cams > 1:
                        cam2_rgb = scene["camera_2"].data.output["rgb"]
                    
                
def main():
    """Main function."""

    # Initialize the simulation context
    sim_cfg = sim_utils.SimulationCfg(dt=0.005, substeps=1)
    sim = sim_utils.SimulationContext(sim_cfg)
    # Set main camera
    sim.set_camera_view(eye=[3.5, 3.5, 3.5], target=[0.0, 0.0, 0.0])
    # design scene
    scene_cfg = SensorsSceneCfg(num_envs=args_cli.num_envs, env_spacing=2.0)
    scene = InteractiveScene(scene_cfg)
    # Play the simulator
    sim.reset()
    # Now we are ready!
    print("[INFO]: Setup complete...")
    # Run the simulator
    run_simulator(sim, scene)


if __name__ == "__main__":
    try:
        # run the main execution
        main()
    except Exception as err:
        carb.log_error(err)
        carb.log_error(traceback.format_exc())
        raise
    finally:
        # close sim app
        simulation_app.close()

Anybody?

Still stuck at the moment.
Have to say it’s rather disappointing. This memory issue makes orbit impossible to use for image based RL. I’m still hoping though I’m somehow not using the framework properly.

Any help would be greatly appreciated!

. . . . . Up

Hi @mike.niemaz - I have reached out to Dev to prioritize answering this question. Sorry for the delay.

1 Like

Hello, sorry to bother, can you give us an update?
Do you foresee a fix sometimes soon?

You say 10 fps is disappointing, but you also set the update_period = 0.1 .
Isn’t it providing the fps you requested?

Good point. But it does not matter actually, somehow …

And It’s more the memory limitation and thus the maximum number of environments to be tun in //, that is an issue here.