Creating an environemnet that uses Cuda

Hi, I am a newbie with Isaac gym. Basically, I want to set up an environment that involves training a model, so the environment has to be loaded to the GPU for a conventional RL task and not robotics. Is IsaacGym the right tool?

Also, I intend to wrap the environment in a vecenv wrapper to be used with stable baselines3. If IsaacGym is the right tool, can someone point me to a tutorial on how to create an environment?

I am trying to create an environment similar to this but with IsaacGym

class AdaptiveLearningRateOptimizer(gym.Env):
    """
    Optimization environment that implements the gym environment interface.
    Can be used to learn an adaptive learning rate schedule.

    Observations (6):
        0: Training loss
        1: Validation loss
        2: Variance of predictions
        3: Variance of prediction changes
        4: Mean of output weight matrix
        5: Variance of output weight matrix
        6: Learning rate

    Actions - Discrete (3):
        0: Increases the learning rate
        1: Decreases the learning rate
        2: No-op

    Actions - Continuous (1):
        0: Scaling factor for the learning rate
    """
    def __init__(self, dataset, architecture, batch_size, update_freq, num_train_steps, initial_lr, discrete=True, action_range=1.05, lr_noise=True):
        super().__init__()
        data, net_fn = utils.load_dataset_and_network(dataset, architecture)

        class SpecDummy():
            def __init__(self, id):
                self.id = id
         
        self.spec = SpecDummy(id='AdaptiveLearningRateContinuous-v0' if not discrete else 'AdaptiveLearningRate-v0')
        self.dataset = dataset
        self.architecture = architecture
        self.train_dataset = data[0]
        self.val_dataset = data[1]
        self.test_dataset = data[2]
        self.net_fn = net_fn
        self.batch_size = batch_size
        self.update_freq = update_freq
        self.num_train_steps = num_train_steps
        self.initial_lr = initial_lr
        self.ep_initial_lr = initial_lr
        self.discrete = discrete
        self.action_range = action_range
        self.last_network_predictions = None
        self.latest_end_val = None

        if discrete:
            self.action_space = spaces.Discrete(3)
        else:
            self.action_space = spaces.Box(
                low=1/self.action_range,
                high=1*self.action_range,
                shape=(1,),
                dtype=np.float32
            )

        self.observation_space = spaces.Box(
                low=-np.inf,
                high=np.inf,
                shape=(7,),
                dtype=np.float32
            )

        self.lr_noise = lr_noise
        self.info_list = []
        self.cuda = torch.cuda.is_available()
        self.displayed_load_error = False


    def _clip_lr(self):
        """
        Clips the learning rate to the [1e-5, 1e-1] range.
        """
        self.lr = float(np.clip(self.lr, 1e-5, 1e-1))


    def _add_lr_noise(self, std=None, clip=True):
        """
        Adds Gaussian noise to the learning rate.
        `std` denotes the standard deviation. Optionally clips the learning rate.
        """
        if std is None: std = 1e-5
        self.lr += float(torch.empty(1).normal_(mean=0, std=std))
        if clip: self._clip_lr()

    
    def _update_lr(self, action, clip=True):
        """
        Updates the current learning rate according to a given action.
        Functionality depends on whether environment is discrete or continuous.
        Optionally clips the learning rate.
        """
        if self.discrete:
            if action == 0:
                self.lr *= self.action_range
            elif action == 1:
                self.lr /= self.action_range
        else:
            self.lr *= float(action)
        
        if self.training_steps != 0:
            if self.lr_noise:
                self._add_lr_noise(clip=clip)
            self.schedule.step()
    

    def test(self):
        """
        Computes loss and accuracy on a test set for the currently stored network.
        """
        test_generator = DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
        test_loss, test_acc = utils.AvgLoss(), utils.AvgLoss()

        for x, y in test_generator:
            with torch.no_grad():
                if self.cuda:
                    x = x.cuda()
                    y = y.cuda()
                yhat = self.net(x)
                test_loss += F.cross_entropy(yhat, y)
                test_acc += utils.accuracy(yhat, y)

        return test_loss.avg, test_acc.avg


    def step(self, action):
        """
        Takes a step in the environment and computes a new state.
        """
        self._update_lr(action)
        train_loss = utils.AvgLoss()
        val_loss = utils.AvgLoss()

        for _ in range(self.update_freq):
            if self.training_steps % self.num_train_batches == 0:
                self.train_iter = iter(self.train_generator)

            x, y = next(self.train_iter)
            if self.cuda:
                x = x.cuda()
                y = y.cuda()
            loss = F.cross_entropy(self.net(x), y)
            train_loss += loss
            self.training_steps += 1

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        yhat_var = utils.AvgLoss()
        network_predictions = []

        for x, y in self.val_generator:
            with torch.no_grad():
                if self.cuda:
                    x = x.cuda()
                    y = y.cuda()
                yhat = self.net(x)
                val_loss += F.cross_entropy(yhat, y)
                network_predictions.append(yhat)
                yhat_var += yhat.var()

        output_layer_weights = list(self.net.parameters())[-2]
        assert output_layer_weights.size(0) == 10

        network_prediction_change_var = []
        for i, pred in enumerate(network_predictions):
            try:
                last_pred = self.last_network_predictions[i]
            except:
                last_pred = 0
            network_prediction_change_var.append((pred - last_pred).var().cpu())
        network_prediction_change_var = np.array(network_prediction_change_var).mean()

        state = np.array([
            train_loss.avg,
            val_loss.avg,
            yhat_var.avg,
            network_prediction_change_var,
            output_layer_weights.mean().data,
            output_layer_weights.var().cpu().data,
            self.lr
        ], dtype=np.float32)
        reward = -val_loss.avg
        done = self.training_steps > self.num_train_steps
        info = {
            'train_loss': train_loss.avg,
            'val_loss': val_loss.avg,
            'lr': self.lr
        }
        self.info_list.append(info)
        self.last_network_predictions = deepcopy(network_predictions)

        if done:
            self.latest_end_val = float(val_loss.avg)

        return state, reward, done, info


    def reset(self, take_first_step=True):
        """
        Resets the environment and returns the initial state.
        """
        setproctitle.setproctitle('PPO2-ALRS-v0')
        self.train_generator = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
        self.val_generator = DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=True)
        self.num_train_batches = len(list(self.train_generator))
        self.training_steps = 0
        self.last_network_predictions = None
        self.info_list = []
        self.net = self.net_fn()
        
        if self.cuda:
            self.net.cuda()

        if self.initial_lr is None:
            self.ep_initial_lr = float(np.random.choice([1e-2, 1e-3, 1e-4]))
        else:
            self.ep_initial_lr = self.initial_lr

        if self.lr_noise:
            self.ep_initial_lr += float(torch.empty(1).normal_(mean=0, std=self.ep_initial_lr/10))
            self.ep_initial_lr = float(np.clip(self.ep_initial_lr, 1e-5, 1e-1))

        self.lr = self.ep_initial_lr
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.ep_initial_lr)   
        self.lambda_func = lambda _: self.lr/self.ep_initial_lr
        self.schedule = torch.optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lambda_func)

        if take_first_step:
            state, _, _, _ = self.step(action=2 if self.discrete else 1)
            
            return state