Hi Sunil,
Thank you for the reply.
Actually, I am converting it on Jetson Nano.
o Linux : Ubuntu 18.04.3 LTS
o CUDA version : 10.0
o Python version : 2.7
o PyTorch version : 1.3.0
o Torchvision version : 0.4.2
o TensorRT version : 6.0.1
You can download the model file from this link.
https://drive.google.com/file/d/15DpYQsNbgHXyQKlbD3vKqBHXDNvjS8Ta/view?usp=sharing
The script file is:
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
import torchvision
import torch.nn as nn
from collections import OrderedDict
import torch
import torch2trt
from torch import nn
import torchvision.models as models
from torch.jit.annotations import Dict
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
from torchvision.ops import misc as misc_nn_ops
class IntermediateLayerGetter(nn.Module):
"""
Module wrapper that returns intermediate layers from a model
It has a strong assumption that the modules have been registered
into the model in the same order as they are used.
This means that one should **not** reuse the same nn.Module
twice in the forward if you want this to work.
Additionally, it is only able to query submodules that are directly
assigned to the model. So if `model` is passed, `model.feature1` can
be returned, but not `model.feature1.layer2`.
Arguments:
model (nn.Module): model on which we will extract the features
return_layers (Dict[name, new_name]): a dict containing the names
of the modules for which the activations will be returned as
the key of the dict, and the value of the dict is the name
of the returned activation (which the user can specify).
Examples::
>>> m = torchvision.models.resnet18(pretrained=True)
>>> # extract layer1 and layer3, giving as names `feat1` and feat2`
>>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
>>> {'layer1': 'feat1', 'layer3': 'feat2'})
>>> out = new_m(torch.rand(1, 3, 224, 224))
>>> print([(k, v.shape) for k, v in out.items()])
>>> [('feat1', torch.Size([1, 64, 56, 56])),
>>> ('feat2', torch.Size([1, 256, 14, 14]))]
"""
_version = 2
__constants__ = ['layers']
__annotations__ = {
"return_layers": Dict[str, str],
}
def __init__(self, model, return_layers):
if not set(return_layers).issubset([name for name, _ in model.named_children()]):
raise ValueError("return_layers are not present in model")
super(IntermediateLayerGetter, self).__init__()
orig_return_layers = return_layers
return_layers = {k: v for k, v in return_layers.items()}
layers = OrderedDict()
for name, module in model.named_children():
layers[name] = module
if name in return_layers:
del return_layers[name]
if not return_layers:
break
self.layers = nn.ModuleDict(layers)
self.return_layers = orig_return_layers
def forward(self, x):
out = OrderedDict()
for name, module in self.layers.items():
x = module(x)
if name in self.return_layers:
out_name = self.return_layers[name]
out[out_name] = x
return out
@torch.jit.ignore
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if (version is None or version < 2):
# now we have a new nesting level for torchscript support
for new_key in self.state_dict().keys():
# remove prefix "layers."
old_key = new_key[len("layers."):]
old_key = prefix + old_key
new_key = prefix + new_key
if old_key in state_dict:
value = state_dict[old_key]
del state_dict[old_key]
state_dict[new_key] = value
super(IntermediateLayerGetter, self)._load_from_state_dict(
state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs)
class BackboneWithFPN(nn.Sequential):
"""
Adds a FPN on top of a model.
Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
extract a submodel that returns the feature maps specified in return_layers.
The same limitations of IntermediatLayerGetter apply here.
Arguments:
backbone (nn.Module)
return_layers (Dict[name, new_name]): a dict containing the names
of the modules for which the activations will be returned as
the key of the dict, and the value of the dict is the name
of the returned activation (which the user can specify).
in_channels_list (List[int]): number of channels for each feature map
that is returned, in the order they are present in the OrderedDict
out_channels (int): number of channels in the FPN.
Attributes:
out_channels (int): the number of channels in the FPN
"""
def __init__(self, backbone, return_layers, in_channels_list, out_channels):
body = IntermediateLayerGetter(backbone, return_layers=return_layers)
fpn = FeaturePyramidNetwork(
in_channels_list=in_channels_list,
out_channels=out_channels,
extra_blocks=LastLevelMaxPool(),
)
super(BackboneWithFPN, self).__init__(OrderedDict(
[("body", body), ("fpn", fpn)]))
self.out_channels = out_channels
def resnet_fpn_backbone(backbone_name, pretrained):
'''backbone = resnet.__dict__[backbone_name](
pretrained=pretrained,
norm_layer=misc_nn_ops.FrozenBatchNorm2d)
# freeze layers
for name, parameter in backbone.named_parameters():
if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
parameter.requires_grad_(False)'''
if backbone_name=='resnet18' : backbone = models.resnet18(pretrained=True);
if backbone_name=='resnet34' : backbone = models.resnet34(pretrained=True);
if backbone_name=='resnet101' : backbone = models.resnet101(pretrained=True);
if backbone_name=='resnet50' : backbone = models.resnet50(pretrained=True);
#backbone = models.vgg16(pretrained=True)
return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
in_channels_stage2 = backbone.inplanes // 8
in_channels_list = [
in_channels_stage2,
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = 256
return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
def get_model_instance(model_name = 'vgg16',n_classes = 50):
if model_name == 'resnet50_fpn':
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features,num_classes=n_classes)
return model
if model_name == 'resnet18_fpn':
backbone = resnet_fpn_backbone('resnet18', True)
model = FasterRCNN(backbone, num_classes=n_classes)
return model
if model_name == 'resnet34_fpn':
backbone = resnet_fpn_backbone('resnet34', True)
model = FasterRCNN(backbone, num_classes=n_classes)
return model
if model_name == 'resnet101_fpn':
backbone = resnet_fpn_backbone('resnet101', True)
model = FasterRCNN(backbone, num_classes=n_classes)
return model
if model_name == 'mobilenet':
backbone = models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
if model_name == 'vgg16':
backbone = models.vgg16(pretrained=True).features
backbone.out_channels = 512
if model_name == 'densenet':
backbone = models.densenet121(pretrained=True).features
backbone.out_channels = 1024
if model_name == 'googlenet':
backbone = models.googlenet(pretrained=True)
modules = list(backbone.children())[:-1] # delete the last fc layer.
backbone = nn.Sequential(*modules)
backbone.out_channels = 1024
if model_name == 'alexnet':
backbone = models.alexnet(pretrained=True).features
backbone.out_channels = 256
'''if model_name == 'inception':
backbone = models.inception_v3(pretrained=True)
modules = list(backbone.children())[:-1] # delete the last fc layer.
backbone = nn.Sequential(*modules)
backbone.out_channels = 2048'''
if model_name == 'resnet18':
backbone = models.resnet18(pretrained=True)
modules = list(backbone.children())[:-1] # delete the last fc layer.
backbone = nn.Sequential(*modules)
backbone.out_channels = 512
if model_name == 'resnet34':
backbone = models.resnet34(pretrained=True)
modules = list(backbone.children())[:-1] # delete the last fc layer.
backbone = nn.Sequential(*modules)
backbone.out_channels = 512
if model_name == 'resnet50':
backbone = models.resnet50(pretrained=True)
modules = list(backbone.children())[:-1] # delete the last fc layer.
backbone = nn.Sequential(*modules)
backbone.out_channels = 2048
if model_name == 'resnet101':
backbone = models.resnet101(pretrained=True)
modules = list(backbone.children())[:-1] # delete the last fc layer.
backbone = nn.Sequential(*modules)
backbone.out_channels = 2048
if model_name == 'resnet152':
backbone = models.resnet152(pretrained=True)
modules = list(backbone.children())[:-1] # delete the last fc layer.
backbone = nn.Sequential(*modules)
backbone.out_channels = 2048
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),))
# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
output_size=7,
sampling_ratio=2)
# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
num_classes=n_classes,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler)
return model
model = get_model_instance('resnet18_fpn',15)
model = model.cuda().eval().half()
class ModelWrapper(torch.nn.Module):
def __init__(self, model):
super(ModelWrapper, self).__init__()
self.model = model
def forward(self, x):
return self.model(x)['out']
model_w = ModelWrapper(model).half()
data = torch.ones((1, 3, 720, 1280)).cuda().half()
model_trt = torch2trt.torch2trt(model_w, [data], fp16_mode=True)