Hi,
Above all, I didn’t allocate any swap space.
And the converter is written as fllows.
Conv3d.py
from torch2trt.module_test import add_module_test
from torch2trt.torch2trt import *
@tensorrt_converter(“torch.nn.Conv3d.forward”)
def convert_Conv3d(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
input_trt = trt_(ctx.network, input)
output = ctx.method_return
kernel_size = module.kernel_size
if not isinstance(kernel_size, tuple):
kernel_size = (kernel_size,) * 3
stride = module.stride
if not isinstance(stride, tuple):
stride = (stride,) * 3
padding = module.padding
if not isinstance(padding, tuple):
padding = (padding,) * 3
dilation = module.dilation
if not isinstance(dilation, tuple):
dilation = (dilation,) * 3
kernel = module.weight.detach().cpu().numpy()
bias = trt.Weights(torch_dtype_to_trt(module.weight.dtype))
if module.bias is not None:
bias = module.bias.detach().cpu().numpy()
layer = ctx.network.add_convolution_nd(
input=input_trt,
num_output_maps=module.out_channels,
kernel_shape=kernel_size,
kernel=kernel,
bias=bias,
)
layer.stride_nd = stride
layer.padding_nd = padding
layer.dilation_nd = dilation
if module.groups is not None:
layer.num_groups = module.groups
output._trt = layer.get_output(0)
@add_module_test(torch.float32, torch.device(“cuda”), [(1, 10, 128, 128, 128)])
def test_Conv3d_basic():
return torch.nn.Conv3d(10, 5, kernel_size=1, stride=1, padding=0)
@add_module_test(torch.float32, torch.device(“cuda”), [(1, 10, 128, 128, 128)])
def test_Conv3d_stride2():
return torch.nn.Conv3d(10, 5, kernel_size=1, stride=2, padding=0)
@add_module_test(torch.float32, torch.device(“cuda”), [(1, 10, 128, 128, 128)])
def test_Conv3d_kernel3():
return torch.nn.Conv3d(10, 5, kernel_size=3, stride=2, padding=1)
@add_module_test(torch.float32, torch.device(“cuda”), [(1, 10, 128, 128, 128)])
def test_Conv3d_dilation2():
return torch.nn.Conv3d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2)
BatchNorm3d.py
from torch2trt.torch2trt import *
@tensorrt_converter(‘torch.nn.BatchNorm3d.forward’)
def convert_BatchNorm3d(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
input_trt = trt_(ctx.network, input)
output = ctx.method_return
scale = module.weight.detach().cpu().numpy() / np.sqrt(module.running_var.detach().cpu().numpy() + module.eps)
bias = module.bias.detach().cpu().numpy() - module.running_mean.detach().cpu().numpy() * scale
power = np.ones_like(scale)
layer = ctx.network.add_scale_nd(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power, 0)
output._trt = layer.get_output(0)
AdaptiveAvgPool3d.py
from torch2trt.torch2trt import *
from torch2trt.module_test import add_module_test
@tensorrt_converter(‘torch.nn.AdaptiveAvgPool3d.forward’)
def convert_AdaptiveAvgPool3d(ctx):
module = ctx.method_args[0]
input = ctx.method_args[1]
output = ctx.method_return
# print(input.shape)
input_trt = trt_(ctx.network, input)
output_size = module.output_size
if not isinstance(output_size, tuple):
output_size = (output_size, ) * 3
stride = (input_trt.shape[-3] // output_size[-3], input_trt.shape[-2] // output_size[-2], input_trt.shape[-1] // output_size[-1])
# print(input_trt.shape, output_size, stride)
kernel_size = stride
layer = ctx.network.add_pooling_nd(
input=input_trt, type=trt.PoolingType.AVERAGE, window_size=kernel_size)
layer.stride_nd = stride
output._trt = layer.get_output(0)
@add_module_test(torch.float32, torch.device(‘cuda’), [(1, 3, 128, 128, 128)])
def test_AdaptiveAvgPool3d_1x1():
return torch.nn.AdaptiveAvgPool3d((1, 1, 1))
@add_module_test(torch.float32, torch.device(‘cuda’), [(1, 3, 128, 128, 128)])
def test_AdaptiveAvgPool3d_2x2():
return torch.nn.AdaptiveAvgPool3d((2, 2, 2))
@add_module_test(torch.float32, torch.device(‘cuda’), [(1, 3, 128, 128, 128)])
def test_AdaptiveAvgPool3d_3x3():
return torch.nn.AdaptiveAvgPool3d((3, 3, 3))
Thanks.