Error converting custom Faster-RCNN model to tensorrt. (int object is not iterable)

Hi,
I have been trying to convert the Resnet18_fpn customized Faster-RCNN network using following code.

import torch
import torchvision
import torch2trt
import tensorrt as trt

model = get_model_instance('resnet18_fpn', num_classes)
model = model.cuda().eval().half()

class ModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super(ModelWrapper, self).__init__()
        self.model = model
    def forward(self, x):
        return self.model(x)['out']

model_w = ModelWrapper(model).half()
data = torch.ones((1, 3, 720, 1280)).cuda().half()
model_trt = torch2trt.torch2trt(model_w, [data], fp16_mode=True)

This code is throwing following error.

Traceback (most recent call last):
  File "inference.py", line 113, in <module>
    main()
  File "inference.py", line 110, in main
    inference_video(args.dataset_path,args.model_name,args.checkpoint_path,args.score_threshold,args.video_path)
  File "inference.py", line 55, in inference_video
    model_trt = torch2trt(model_w, [data], fp16_mode=True)
  File "/usr/local/lib/python2.7/dist-packages/torch2trt/torch2trt.py", line 377, in torch2trt
    outputs = module(*inputs)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "inference.py", line 51, in forward
    return self.model(x)['out']
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "build/bdist.linux-aarch64/egg/torchvision/models/detection/generalized_rcnn.py", line 46, in forward
  File "/usr/local/lib/python2.7/dist-packages/torch/tensor.py", line 411, in <lambda>
    return iter(imap(lambda i: self[i], range(self.size(0))))
  File "/usr/local/lib/python2.7/dist-packages/torch2trt/torch2trt.py", line 202, in wrapper
    converter['converter'](ctx)
  File "/usr/local/lib/python2.7/dist-packages/torch2trt/converters/getitem.py", line 34, in convert_tensor_getitem
    num_ellipsis = input.ndim - num_slice_types(slices)
  File "/usr/local/lib/python2.7/dist-packages/torch2trt/converters/getitem.py", line 18, in num_slice_types
    for s in slices:
TypeError: 'int' object is not iterable

Hi,

Could you please share the script and model file so we can help better?
Also, can you provide details on the platforms you are using:
o Linux distro and version
o GPU type
o Nvidia driver version
o CUDA version
o CUDNN version
o Python version [if using python]
o Tensorflow and PyTorch version
o TensorRT version

Meanwhile, you can try alternate approach to convert Pytorch model to ONNX and then ONNX to TRT conversion.
https://github.com/onnx/onnx-tensorrt/blob/master/operators.md

Thanks

Hi Sunil,

Thank you for the reply.
Actually, I am converting it on Jetson Nano.

o Linux : Ubuntu 18.04.3 LTS
o CUDA version : 10.0
o Python version : 2.7
o PyTorch version : 1.3.0
o Torchvision version : 0.4.2
o TensorRT version : 6.0.1

You can download the model file from this link.

https://drive.google.com/file/d/15DpYQsNbgHXyQKlbD3vKqBHXDNvjS8Ta/view?usp=sharing

The script file is:

from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
import torchvision
import torch.nn as nn
from collections import OrderedDict
import torch
import torch2trt
from torch import nn
import torchvision.models as models
from torch.jit.annotations import Dict
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
from torchvision.ops import misc as misc_nn_ops


class IntermediateLayerGetter(nn.Module):
    """
    Module wrapper that returns intermediate layers from a model
    It has a strong assumption that the modules have been registered
    into the model in the same order as they are used.
    This means that one should **not** reuse the same nn.Module
    twice in the forward if you want this to work.
    Additionally, it is only able to query submodules that are directly
    assigned to the model. So if `model` is passed, `model.feature1` can
    be returned, but not `model.feature1.layer2`.
    Arguments:
        model (nn.Module): model on which we will extract the features
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
    Examples::
        >>> m = torchvision.models.resnet18(pretrained=True)
        >>> # extract layer1 and layer3, giving as names `feat1` and feat2`
        >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
        >>>     {'layer1': 'feat1', 'layer3': 'feat2'})
        >>> out = new_m(torch.rand(1, 3, 224, 224))
        >>> print([(k, v.shape) for k, v in out.items()])
        >>>     [('feat1', torch.Size([1, 64, 56, 56])),
        >>>      ('feat2', torch.Size([1, 256, 14, 14]))]
    """
    _version = 2
    __constants__ = ['layers']
    __annotations__ = {
        "return_layers": Dict[str, str],
    }

    def __init__(self, model, return_layers):
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")
        super(IntermediateLayerGetter, self).__init__()

        orig_return_layers = return_layers
        return_layers = {k: v for k, v in return_layers.items()}
        layers = OrderedDict()
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        self.layers = nn.ModuleDict(layers)
        self.return_layers = orig_return_layers

    def forward(self, x):
        out = OrderedDict()
        for name, module in self.layers.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out

    @torch.jit.ignore
    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
                              missing_keys, unexpected_keys, error_msgs):
        version = local_metadata.get('version', None)
        if (version is None or version < 2):
            # now we have a new nesting level for torchscript support
            for new_key in self.state_dict().keys():
                # remove prefix "layers."
                old_key = new_key[len("layers."):]
                old_key = prefix + old_key
                new_key = prefix + new_key
                if old_key in state_dict:
                    value = state_dict[old_key]
                    del state_dict[old_key]
                    state_dict[new_key] = value
        super(IntermediateLayerGetter, self)._load_from_state_dict(
            state_dict, prefix, local_metadata, strict,
            missing_keys, unexpected_keys, error_msgs)




class BackboneWithFPN(nn.Sequential):
    """
    Adds a FPN on top of a model.
    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
    extract a submodel that returns the feature maps specified in return_layers.
    The same limitations of IntermediatLayerGetter apply here.
    Arguments:
        backbone (nn.Module)
        return_layers (Dict[name, new_name]): a dict containing the names
            of the modules for which the activations will be returned as
            the key of the dict, and the value of the dict is the name
            of the returned activation (which the user can specify).
        in_channels_list (List[int]): number of channels for each feature map
            that is returned, in the order they are present in the OrderedDict
        out_channels (int): number of channels in the FPN.
    Attributes:
        out_channels (int): the number of channels in the FPN
    """
    def __init__(self, backbone, return_layers, in_channels_list, out_channels):
        body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=LastLevelMaxPool(),
        )
        super(BackboneWithFPN, self).__init__(OrderedDict(
            [("body", body), ("fpn", fpn)]))
        self.out_channels = out_channels


def resnet_fpn_backbone(backbone_name, pretrained):
    '''backbone = resnet.__dict__[backbone_name](
        pretrained=pretrained,
        norm_layer=misc_nn_ops.FrozenBatchNorm2d)
    # freeze layers
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)'''
    if backbone_name=='resnet18' : backbone = models.resnet18(pretrained=True);
    if backbone_name=='resnet34' : backbone = models.resnet34(pretrained=True);
    if backbone_name=='resnet101' : backbone = models.resnet101(pretrained=True);
    if backbone_name=='resnet50' : backbone = models.resnet50(pretrained=True);
    #backbone = models.vgg16(pretrained=True)

    return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}

    in_channels_stage2 = backbone.inplanes // 8
    in_channels_list = [
        in_channels_stage2,
        in_channels_stage2 * 2,
        in_channels_stage2 * 4,
        in_channels_stage2 * 8,
    ]
    out_channels = 256
    return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)

def get_model_instance(model_name = 'vgg16',n_classes = 50):

    if model_name == 'resnet50_fpn':
        model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features,num_classes=n_classes)
        return model

    if model_name == 'resnet18_fpn':
        backbone = resnet_fpn_backbone('resnet18', True)
        model = FasterRCNN(backbone, num_classes=n_classes)
        return model

    if model_name == 'resnet34_fpn':
        backbone = resnet_fpn_backbone('resnet34', True)
        model = FasterRCNN(backbone, num_classes=n_classes)
        return model


    if model_name == 'resnet101_fpn':
        backbone = resnet_fpn_backbone('resnet101', True)
        model = FasterRCNN(backbone, num_classes=n_classes)
        return model


    if model_name == 'mobilenet':
        backbone = models.mobilenet_v2(pretrained=True).features
        backbone.out_channels = 1280

    if model_name == 'vgg16':
        backbone = models.vgg16(pretrained=True).features
        backbone.out_channels = 512

    if model_name == 'densenet':
        backbone = models.densenet121(pretrained=True).features
        backbone.out_channels = 1024

    if model_name == 'googlenet':
        backbone = models.googlenet(pretrained=True)
        modules = list(backbone.children())[:-1] # delete the last fc layer.
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 1024

    if model_name == 'alexnet':
        backbone = models.alexnet(pretrained=True).features
        backbone.out_channels = 256

    '''if model_name == 'inception':
        backbone = models.inception_v3(pretrained=True)
        modules = list(backbone.children())[:-1] # delete the last fc layer.
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 2048'''

    if model_name == 'resnet18':
        backbone = models.resnet18(pretrained=True)
        modules = list(backbone.children())[:-1] # delete the last fc layer.
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 512

    if model_name == 'resnet34':
        backbone = models.resnet34(pretrained=True)
        modules = list(backbone.children())[:-1] # delete the last fc layer.
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 512

    if model_name == 'resnet50':
        backbone = models.resnet50(pretrained=True)
        modules = list(backbone.children())[:-1] # delete the last fc layer.
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 2048

    if model_name == 'resnet101':
        backbone = models.resnet101(pretrained=True)
        modules = list(backbone.children())[:-1] # delete the last fc layer.
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 2048

    if model_name == 'resnet152':
        backbone = models.resnet152(pretrained=True)
        modules = list(backbone.children())[:-1] # delete the last fc layer.
        backbone = nn.Sequential(*modules)
        backbone.out_channels = 2048


    # FasterRCNN needs to know the number of
    # output channels in a backbone. For mobilenet_v2, it's 1280
    # so we need to add it here

    # let's make the RPN generate 5 x 3 anchors per spatial
    # location, with 5 different sizes and 3 different aspect
    # ratios. We have a Tuple[Tuple[int]] because each feature
    # map could potentially have different sizes and
    # aspect ratios
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                       aspect_ratios=((0.5, 1.0, 2.0),))

    # let's define what are the feature maps that we will
    # use to perform the region of interest cropping, as well as
    # the size of the crop after rescaling.
    # if your backbone returns a Tensor, featmap_names is expected to
    # be [0]. More generally, the backbone should return an
    # OrderedDict[Tensor], and in featmap_names you can choose which
    # feature maps to use.

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)

    # put the pieces together inside a FasterRCNN model

    model = FasterRCNN(backbone,
                       num_classes=n_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    return model


model = get_model_instance('resnet18_fpn',15)
model = model.cuda().eval().half()

class ModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super(ModelWrapper, self).__init__()
        self.model = model
    def forward(self, x):
        return self.model(x)['out']

model_w = ModelWrapper(model).half()
data = torch.ones((1, 3, 720, 1280)).cuda().half()
model_trt = torch2trt.torch2trt(model_w, [data], fp16_mode=True)

Hi,

This specific issue might be arising because the ONNX Parser isn’t currently compatible with the ONNX models exported from Pytorch 1.3 - If you downgrade to Pytorch 1.2, this issue should go away.

or You can use TRT 7, it supports Pytorch 1.3.
Please refer below link for more details:
https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/tensorrt-7.html#tensorrt-7

Thanks