yeah Orbit is the new version of Leeged-gym, here is a code sample from orbit standalone examples, it first load a pth file and convert it to onnx and the rest:
# load previously trained model
ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
ppo_runner.load(resume_path)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# obtain the trained policy for inference
policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
# export policy to onnx
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
export_policy_as_onnx(ppo_runner.alg.actor_critic, export_model_dir, filename="policy.onnx")
# reset environment
obs, _ = env.get_observations()
# simulate environment
while simulation_app.is_running():
# run everything in inference mode
with torch.inference_mode():
# agent stepping
actions = policy(obs)
# env stepping
obs, _, _, _ = env.step(actions)
# close the simulator
env.close()
and also you have access to the details of the NN:
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_hidden_dims=[32, 32],
critic_hidden_dims=[32, 32],
activation=“elu”,
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule=“adaptive”,
gamma=0.99,
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,