CUDA error: device-side assert triggered

I am using train sdk 4.0 to train DecathlonDataset Task01_BrainTumour, but the following error appears, can anyone help me?

My MMAR JSON:

{
  "epochs": 100,
  "num_interval_per_valid": 1,
  "multi_gpu": false,
  "amp": true,
  "learning_rate": 1e-4,
  "tf32": true,
  "cudnn_benchmark": true,
  "dont_load_ckpt_model": true,
  "determinism": {
    "random_seed": 0
  },
  "train": {
    "loss": {
      "name": "DiceLoss",
      "args":{
        "to_onehot_y": false,
        "squared_pred": true,
        "sigmoid": true
      }
    },
    "optimizer": {
      "name": "Adam",
      "args": {
        "lr": "{learning_rate}",
        "weight_decay":1e-5, 
        "amsgrad": true
      }
    },
    "lr_scheduler": {
      "name": "StepLR",
      "args": {
        "step_size": 20,
        "gamma": 0.1
      }
    },
    "model": {
      "name": "UNet",
      "args": {
        "dimensions": 3,
        "in_channels": 1,
        "out_channels": 3,
        "channels": [16, 32, 64, 128, 256],
        "strides": [2, 2, 2, 2],
        "num_res_units": 2
      }
    },
    "pre_transforms": [
      {
        "name": "LoadImaged",
        "args": {
          "keys": ["image", "label"]
        }
      },
      {
        "name": "EnsureChannelFirstd",
        "args":{
          "keys": "image"
        }
      },
      {
        "path": "myTransformation.ConvertToMultiChannelBasedOnBratsClassesd",
        "args":{
          "keys": "label"
        }
      },
      {
        "name": "Spacingd",
        "args": {
            "keys": ["image", "label"],
            "pixdim": [1.5, 1.5, 2.0],
            "mode":["bilinear", "nearest"]
        }
      },
      {
        "name": "Orientationd",
        "args": {
            "keys": ["image", "label"],
            "axcodes":"RAS"
        }
      },
      {
        "name": "RandSpatialCropd",
        "args": {
            "keys": ["image", "label"],
            "roi_size":[128, 128, 64],
            "random_size":false
        }
      },
      {
        "name": "RandFlipd",
        "args": {
            "keys": ["image", "label"],
            "prob": 0.5,
            "spatial_axis":0
        }
      },
      {
        "name": "NormalizeIntensityd",
        "args": {
            "keys": "image",
            "nonzero": true,
            "channel_wise":true
        }
      },
      {
        "name": "RandScaleIntensityd",
        "args": {
            "keys": "image",
            "factors": 0.1,
            "prob":0.5
        }
      },
      {
        "name": "RandShiftIntensityd",
        "args": {
            "keys": "image",
            "offsets": 0.1,
            "prob":0.5
        }
      },
      {
        "name": "ToTensord",
        "args": {
          "keys": ["image", "label"]
        }
      }
    ],
    "dataset": {
      "name": "CacheDataset",
      "data_list_file_path": "{DATASET_JSON}",
      "data_file_base_dir": "{DATA_ROOT}",
      "data_list_key": "training",
      "args": {
        "cache_num": 4,
        "cache_rate": 1.0,
        "num_workers": 2
      }
    },
    "dataloader": {
      "name": "DataLoader",
      "args": {
        "batch_size": 2,
        "shuffle": true,
        "num_workers": 4
      }
    },
    "inferer": {
      "name": "SimpleInferer"
    },
    "handlers": [
      {
        "name": "CheckpointLoader",
        "disabled": "{dont_load_ckpt_model}",
        "args": {
          "load_path": "{MMAR_CKPT}",
          "load_dict": ["model"]
        }
      },
      {
        "name": "LrScheduleHandler",
        "args": {
          "print_lr": true
        }
      },
      {
        "name": "ValidationHandler",
        "args": {
          "interval": "{num_interval_per_valid}",
          "epoch_level": true
        }
      },
      {
        "name": "CheckpointSaver",
        "rank": 0,
        "args": {
          "save_dir": "{MMAR_CKPT_DIR}",
          "save_dict": ["model", "optimizer", "lr_scheduler"],
          "save_final": true,
          "save_interval": 5
        }
      },
      {
        "name": "StatsHandler",
        "rank": 0,
        "args": {
          "tag_name": "train_loss",
          "output_transform": "lambda x: x['loss']"
        }
      },
      {
        "name": "TensorBoardStatsHandler",
        "rank": 0,
        "args": {
          "log_dir": "{MMAR_CKPT_DIR}",
          "tag_name": "train_loss",
          "output_transform": "lambda x: x['loss']"
        }
      }
    ],
    "post_transforms": [
      {
        "name": "Activationsd",
        "args": {
          "keys": "pred",
          "softmax": true
        }
      },
      {
        "name": "AsDiscreted",
        "args": {
          "keys": ["pred", "label"],
          "argmax": [true, false],
          "to_onehot": true,
          "n_classes": 2
        }
      }
    ],
    "metrics": [
      {
        "name": "Accuracy",
        "log_label": "train_acc",
        "is_key_metric": true,
        "args": {
          "output_transform": "lambda x: (x['pred'], x['label'])"
        }
      }
    ],
    "trainer": {
      "name": "SupervisedTrainer",
      "args": {
        "max_epochs": "{epochs}"
      }
    }
  },
  "validate": {
    "pre_transforms": [
      {
        "ref": "LoadImaged"
      },
      {
        "ref": "EnsureChannelFirstd"
      },
      {
        "ref": "myTransformation.ConvertToMultiChannelBasedOnBratsClassesd"
      },
      {
        "ref": "Spacingd"
      },
      {
        "name": "CenterSpatialCropd",
        "args": {
            "keys": ["image","label"],
            "roi_size": [128, 128, 64]
        }
      },
      {
        "ref": "NormalizeIntensityd"
      },
      {
        "ref": "ToTensord"
      }
    ],
    "dataset": {
      "name": "CacheDataset",
      "data_list_file_path": "{DATASET_JSON}",
      "data_file_base_dir": "{DATA_ROOT}",
      "data_list_key": "validation",
      "args": {
        "cache_num": 4,
        "cache_rate": 1.0,
        "num_workers": 2
      }
    },
    "dataloader": {
      "name": "DataLoader",
      "args": {
        "batch_size": 1,
        "shuffle": false,
        "num_workers": 4
      }
    },
    "inferer": {
      "name": "SlidingWindowInferer",
      "args": {
        "roi_size": [160, 160, 160],
        "sw_batch_size": 4,
        "overlap": 0.5
      }
    },
    "handlers": [
      {
        "name": "StatsHandler",
        "rank": 0,
        "args": {
          "output_transform": "lambda x: None"
        }
      },
      {
        "name": "TensorBoardStatsHandler",
        "rank": 0,
        "args": {
          "log_dir": "{MMAR_CKPT_DIR}",
          "output_transform": "lambda x: None"
        }
      },
      {
        "name": "CheckpointSaver",
        "rank": 0,
        "args": {
          "save_dir": "{MMAR_CKPT_DIR}",
          "save_dict": ["model","train_conf"],
          "save_key_metric": true
        }
      }
    ],
    "post_transforms": [
      {
        "ref": "Activationsd"
      },
      {
        "ref": "AsDiscreted"
      }
    ],
    "metrics": [
      {
        "name": "MeanDice",
        "log_label": "val_mean_dice",
        "is_key_metric": true,
        "args": {
          "include_background": false,
          "output_transform": "lambda x: (x['pred'], x['label'])"
        }
      },
      {
        "name": "Accuracy",
        "log_label": "val_acc",
        "args": {
          "output_transform": "lambda x: (x['pred'], x['label'])"
        }
      }
    ],
    "evaluator": {
      "name": "SupervisedEvaluator"
    }
  }
}

My myTransformation.py:

import numpy as np
from monai.transforms import (
    MapTransform,
)

class ConvertToMultiChannelBasedOnBratsClassesd(MapTransform):
    """
    Convert labels to multi channels based on brats classes:
    label 1 is the peritumoral edema
    label 2 is the GD-enhancing tumor
    label 3 is the necrotic and non-enhancing tumor core
    The possible classes are TC (Tumor core), WT (Whole tumor)
    and ET (Enhancing tumor).

    """

    def __call__(self, data):
        d = dict(data)
        for key in self.keys:
            result = []
            # merge label 2 and label 3 to construct TC
            result.append(np.logical_or(d[key] == 2, d[key] == 3))
            # merge labels 1, 2 and 3 to construct WT
            result.append(
                np.logical_or(
                    np.logical_or(d[key] == 2, d[key] == 3), d[key] == 1
                )
            )
            # label 2 is ET
            result.append(d[key] == 2)
            d[key] = np.stack(result, axis=0).astype(np.float32)
        return d

Error log:

2021-06-18 07:26:50,263 - ignite.engine.engine.SupervisedTrainer - INFO - Engine run resuming from iteration 0, epoch 0 until 100 epochs
input data information of the runtime error transform:
2021-06-18 07:26:59,978 - DataStats - INFO - input data information of the runtime error transform:
/tmp/pip-req-build-_tx3iysr/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:310: operator(): block: [64,0,0], thread: [33,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
.....
/tmp/pip-req-build-_tx3iysr/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:310: operator(): block: [71,0,0], thread: [62,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
2021-06-18 07:27:00,108 - ignite.engine.engine.SupervisedTrainer - ERROR - Current run is terminating due to exception: applying transform <monai.transforms.compose.Compose object at 0x7fa0fffa3b20>
2021-06-18 07:27:08,421 - ignite.engine.engine.SupervisedTrainer - ERROR - Engine run is terminating due to exception: CUDA error: device-side assert triggered
2021-06-18 07:27:08,422 - ignite.engine.engine.SupervisedTrainer - INFO - Deleted previous saved final checkpoint: checkpoint_final_iteration=1.pt
Traceback (most recent call last):
  File "/opt/monai/monai/transforms/transform.py", line 48, in apply_transform
    return transform(data)
  File "/opt/monai/monai/transforms/post/dictionary.py", line 149, in __call__
    d[key] = self.converter(
  File "/opt/monai/monai/transforms/post/array.py", line 174, in __call__
    img = one_hot(img, _nclasses)
  File "/opt/monai/monai/networks/utils.py", line 57, in one_hot
    raise AssertionError("labels should have a channel with length equal to one.")
AssertionError: labels should have a channel with length equal to one.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/monai/monai/transforms/transform.py", line 48, in apply_transform
    return transform(data)
  File "/opt/monai/monai/transforms/compose.py", line 144, in __call__
    input_ = apply_transform(_transform, input_)
  File "/opt/monai/monai/transforms/transform.py", line 68, in apply_transform
    _log_stats(data=v, prefix=k)
  File "/opt/monai/monai/transforms/transform.py", line 61, in _log_stats
    datastats(img=data, data_shape=True, value_range=True, prefix=prefix)  # type: ignore
  File "/opt/monai/monai/transforms/utility/array.py", line 457, in __call__
    lines.append(f"Value range: ({torch.min(img)}, {torch.max(img)})")
  File "/opt/conda/lib/python3.8/site-packages/torch/tensor.py", line 524, in __format__
    return self.item().__format__(format_spec)
RuntimeError: CUDA error: device-side assert triggered

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 833, in _run_once_on_dataset
    self.state.output = self._process_function(self, self.state.batch)
  File "/opt/monai/monai/engines/trainer.py", line 182, in _iteration
    engine.fire_event(IterationEvents.MODEL_COMPLETED)
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 448, in fire_event
    return self._fire_event(event_name)
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 424, in _fire_event
    func(*first, *(event_args + others), **kwargs)
  File "/opt/monai/monai/engines/workflow.py", line 165, in run_post_transform
    engine.state.output = apply_transform(posttrans, engine.state.output)
  File "/opt/monai/monai/transforms/transform.py", line 71, in apply_transform
    raise RuntimeError(f"applying transform {transform}") from e
RuntimeError: applying transform <monai.transforms.compose.Compose object at 0x7fa0fffa3b20>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 745, in _internal_run
    time_taken = self._run_once_on_dataset()
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 850, in _run_once_on_dataset
    self._handle_exception(e)
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 467, in _handle_exception
    self._fire_event(Events.EXCEPTION_RAISED, e)
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 424, in _fire_event
    func(*first, *(event_args + others), **kwargs)
  File "/opt/monai/monai/handlers/checkpoint_saver.py", line 269, in exception_raised
    self._final_checkpoint(engine)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 430, in __call__
    self.save_handler(checkpoint, filename, metadata)
  File "/opt/monai/monai/handlers/checkpoint_saver.py", line 126, in __call__
    super().__call__(checkpoint=checkpoint, filename=filename, metadata=metadata)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 683, in __call__
    self._save_native(checkpoint, path)
  File "/opt/conda/lib/python3.8/site-packages/ignite/distributed/utils.py", line 569, in wrapper
    ret = func(*args, **kwargs)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 687, in _save_native
    self._save_func(checkpoint, path, torch.save)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 697, in _save_func
    func(checkpoint, path, **self.kwargs)
  File "/opt/conda/lib/python3.8/site-packages/torch/serialization.py", line 372, in save
    _save(obj, opened_zipfile, pickle_module, pickle_protocol)
  File "/opt/conda/lib/python3.8/site-packages/torch/serialization.py", line 488, in _save
    storage = storage.cpu()
  File "/opt/conda/lib/python3.8/site-packages/torch/storage.py", line 72, in cpu
    return self.type(getattr(torch, self.__class__.__name__))
  File "/opt/conda/lib/python3.8/site-packages/torch/cuda/__init__.py", line 492, in type
    return super(_CudaBase, self).type(*args, **kwargs)  # type: ignore[misc]
  File "/opt/conda/lib/python3.8/site-packages/torch/_utils.py", line 46, in _type
    return dtype(self.size()).copy_(self, non_blocking)
RuntimeError: CUDA error: device-side assert triggered

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/conda/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "apps/train.py", line 35, in <module>
  File "apps/train.py", line 27, in main
  File "apps/mmar_conf.py", line 31, in train_mmar
  File "/opt/monai/monai/engines/trainer.py", line 49, in run
    super().run()
  File "/opt/monai/monai/engines/workflow.py", line 206, in run
    super().run(data=self.data_loader, max_epochs=self.state.max_epochs)
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 702, in run
    return self._internal_run()
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 775, in _internal_run
    self._handle_exception(e)
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 467, in _handle_exception
    self._fire_event(Events.EXCEPTION_RAISED, e)
  File "/opt/conda/lib/python3.8/site-packages/ignite/engine/engine.py", line 424, in _fire_event
    func(*first, *(event_args + others), **kwargs)
  File "/opt/monai/monai/handlers/checkpoint_saver.py", line 269, in exception_raised
    self._final_checkpoint(engine)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 430, in __call__
    self.save_handler(checkpoint, filename, metadata)
  File "/opt/monai/monai/handlers/checkpoint_saver.py", line 126, in __call__
    super().__call__(checkpoint=checkpoint, filename=filename, metadata=metadata)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 683, in __call__
    self._save_native(checkpoint, path)
  File "/opt/conda/lib/python3.8/site-packages/ignite/distributed/utils.py", line 569, in wrapper
    ret = func(*args, **kwargs)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 687, in _save_native
    self._save_func(checkpoint, path, torch.save)
  File "/opt/conda/lib/python3.8/site-packages/ignite/handlers/checkpoint.py", line 697, in _save_func
    func(checkpoint, path, **self.kwargs)
  File "/opt/conda/lib/python3.8/site-packages/torch/serialization.py", line 372, in save
    _save(obj, opened_zipfile, pickle_module, pickle_protocol)
  File "/opt/conda/lib/python3.8/site-packages/torch/serialization.py", line 488, in _save
    storage = storage.cpu()
  File "/opt/conda/lib/python3.8/site-packages/torch/storage.py", line 72, in cpu
    return self.type(getattr(torch, self.__class__.__name__))
  File "/opt/conda/lib/python3.8/site-packages/torch/cuda/__init__.py", line 492, in type
    return super(_CudaBase, self).type(*args, **kwargs)  # type: ignore[misc]
  File "/opt/conda/lib/python3.8/site-packages/torch/_utils.py", line 46, in _type
    return dtype(self.size()).copy_(self, non_blocking)
RuntimeError: CUDA error: device-side assert triggered

This means that the number of my labels is not consistent with the actual number?

Hi
I am note familiar with this dataset or your custom loader, but notice this error

File "/opt/monai/monai/networks/utils.py", line 57, in one_hot
    raise AssertionError("labels should have a channel with length equal to one.")
AssertionError: labels should have a channel with length equal to one.

It seems either the network and/or loss is expecting the label to be single channel so it can convert it to 1 hot ( n channels) I think you are already doing that so the loss and/or network should not do it again