Description
Hi, I am trying to use IResizeLayer do the interpolate op.
In NEAREST mode, the layer give me some unexpected results.
The input is [1,0,1,0…] with shape [1,1,1,33], and the output shape is [1,1,1,66] (double width)
I expect the result should be [1,1,0,0,1,1,0,0…], but the result is [ 1,1,1,0,0,1,1…]. There is an extra 1 in the begining.
Environment
TensorRT Version: 7.1.3.4
GPU Type: 2070 Super
Nvidia Driver Version: 450.80.02
CUDA Version: 10.2
CUDNN Version: 8.0.4
Operating System + Version: ubuntu18.04
Python Version (if applicable): 3.7
TensorFlow Version (if applicable):
PyTorch Version (if applicable): 1.7.0
Baremetal or Container (if container which image + tag):
Steps To Reproduce
import tensorrt as trt
import torch
import numpy as np
def main():
input_size = [1, 1, 1, 33]
print("create trt model")
log_level = trt.Logger.ERROR
logger = trt.Logger(log_level)
builder = trt.Builder(logger)
## build network
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(EXPLICIT_BATCH)
input_name = 'input'
output_name = 'output'
input_trt = network.add_input(name=input_name,
shape=input_size,
dtype=trt.float32)
layer = network.add_resize(input_trt)
layer.shape = tuple(input_size[:3] + [input_size[3] * 2])
layer.resize_mode = trt.ResizeMode.NEAREST
output = layer.get_output(0)
output.name = output_name
network.mark_output(output)
## builder config
max_workspace_size = 1 << 30
fp16_mode = False
builder.max_workspace_size = max_workspace_size
builder.fp16_mode = fp16_mode
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size
profile = builder.create_optimization_profile()
# set shape
input_shape = input_size
profile.set_shape(input_name, input_shape, input_shape, input_shape)
config.add_optimization_profile(profile)
if fp16_mode:
config.set_flag(trt.BuilderFlag.FP16)
# build engine
engine = builder.build_engine(network, config)
context = engine.create_execution_context()
print("inference")
input_torch = torch.zeros(input_size, dtype=torch.float32).cuda().contiguous()
input_torch[:,:,:,::2] = 1
bindings = [None] * 2
# set input
idx = engine.get_binding_index(input_name)
context.set_binding_shape(idx, tuple(input_torch.shape))
bindings[idx] = input_torch.data_ptr()
# set output
idx = engine.get_binding_index(output_name)
shape = tuple(context.get_binding_shape(idx))
output_torch = torch.empty(shape, dtype=torch.float32).cuda()
bindings[idx] = output_torch.data_ptr()
context.execute_async_v2(bindings, torch.cuda.current_stream().cuda_stream)
print("input:")
print(input_torch.view(-1)[:20])
print("output:")
print(output_torch.view(-1)[:20])
if __name__ == "__main__":
main()
Only tensors with size 33, 37, 41, 47, 55 (and their Integer multiple) will cause this results.
Why did this happened and how could I fix it?
Thanks.