BUG: TensorRT 3.0 Python API did not change variable ownership to C++ in add_convolution(), add_scal...

songzhaozhe · January 12, 2018, 11:50am

Hi,

Through my trials, I guess TensorRT 3.0 Python API did not change variable ownership to C++ in functions such as add_convolution(), add_scale(). The reproduction process is the following:

Simply move a part of example code to add a layer into a function, and need not change anything else. Like the following:

def make_layer(weight_dict, pool1, network):
    conv_w = weight_dict['conv2.weight'].cpu().numpy().reshape(-1)
    conv_b = weight_dict['conv2.bias'].cpu().numpy().reshape(-1)
    conv = network.add_convolution(pool1.get_output(0), 50, (5,5), conv_w, conv_b)
    conv.set_stride((1,1))
    return conv, network

After the function, conv_w and conv_b will likely be destructed by Python, but I guess the underlying C++ pointer still uses the variables. As a result, the network will output Nan/inf/0.

To work around with it, I can pass a dict type into the function and store the weights in the dict. Then everything goes well. Thus I believe my guess is right.

Here’s a complete code almost same (just moving conv2 to a function) to the example code to reproduce:

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from matplotlib.pyplot import imshow #to show test case
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

BATCH_SIZE = 64
TEST_BATCH_SIZE = 1000
EPOCHS = 3
LEARNING_RATE = 0.001
SGD_MOMENTUM = 0.5
SEED = 1
LOG_INTERVAL = 10

#Enable Cuda
torch.cuda.manual_seed(SEED)

#Dataloader
kwargs = {'num_workers': 1, 'pin_memory': True}
train_loader  = torch.utils.data.DataLoader(
    datasets.MNIST('/tmp/mnist/data', train=True, download=True,
                    transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.1307,), (0.3081,))
        ])),
    batch_size=BATCH_SIZE,
    shuffle=True,
    **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('/tmp/mnist/data', train=False,
                   transform=transforms.Compose([
                   transforms.ToTensor(),
                    transforms.Normalize((0.1307,), (0.3081,))
        ])),
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    **kwargs)

#Network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
        self.conv2 = nn.Conv2d(20, 50, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(800, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
        x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
        x = x.view(-1, 800)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x)

model = Net()
model.cuda()

optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=SGD_MOMENTUM)

def train(epoch):
    model.train()
    for batch, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        #if batch % LOG_INTERVAL == 0:
            #print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
            #      .format(epoch,
            #              batch * len(data),
            #              len(train_loader.dataset),
            #              100. * batch / len(train_loader),
            #              loss.data[0]))

def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()
    test_loss /= len(test_loader)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
          .format(test_loss,
                  correct,
                  len(test_loader.dataset),
                  100. * correct / len(test_loader.dataset)))

for e in range(EPOCHS):
    train(e + 1)
    test(e + 1)

def make_layer(weight_dict, pool1, network):
    conv2_w = weight_dict['conv2.weight'].cpu().numpy().reshape(-1)
    conv2_b = weight_dict['conv2.bias'].cpu().numpy().reshape(-1)
    conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
    conv2.set_stride((1,1))
    return conv2, network

weights = model.state_dict()
torch.save(weights, 'weights')
G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
builder = trt.infer.create_infer_builder(G_LOGGER)
network = builder.create_network()
#Name for the input layer, data type, tuple for dimension
data = network.add_input("data", trt.infer.DataType.FLOAT, (1, 28, 28))
assert(data)
#-------------
conv1_w = weights['conv1.weight'].cpu().numpy().reshape(-1)
conv1_b = weights['conv1.bias'].cpu().numpy().reshape(-1)
conv1 = network.add_convolution(data, 20, (5,5),  conv1_w, conv1_b)
assert(conv1)
conv1.set_stride((1,1))

#-------------
pool1 = network.add_pooling(conv1.get_output(0), trt.infer.PoolingType.MAX, (2,2))
assert(pool1)
pool1.set_stride((2,2))
#-------------
#conv2_w = weights['conv2.weight'].cpu().numpy().reshape(-1)
#conv2_b = weights['conv2.bias'].cpu().numpy().reshape(-1)
#conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
#conv2.set_stride((1,1))
conv2, network = make_layer(weights, pool1, network)
##-------------
pool2 = network.add_pooling(conv2.get_output(0), trt.infer.PoolingType.MAX, (2,2))
assert(pool2)
pool2.set_stride((2,2))

#-------------
fc1_w = weights['fc1.weight'].cpu().numpy().reshape(-1)
fc1_b = weights['fc1.bias'].cpu().numpy().reshape(-1)
fc1 = network.add_fully_connected(pool2.get_output(0), 500, fc1_w, fc1_b)
assert(fc1)

#-------------
relu1 = network.add_activation(fc1.get_output(0), trt.infer.ActivationType.RELU)
assert(relu1)

#-------------
fc2_w = weights['fc2.weight'].cpu().numpy().reshape(-1)
fc2_b = weights['fc2.bias'].cpu().numpy().reshape(-1)
fc2 = network.add_fully_connected(relu1.get_output(0), 10, fc2_w, fc2_b)
assert(fc2)

fc2.get_output(0).set_name("prob")
network.mark_output(fc2.get_output(0))

builder.set_max_batch_size(1)
builder.set_max_workspace_size(1 << 20)

ori_engine = builder.build_cuda_engine(network)
engine = trt.lite.Engine(engine_stream=ori_engine.serialize(), max_batch_size=1)
network.destroy()
builder.destroy()
ori_engine.destroy()

img, target = next(iter(test_loader))
img = img.numpy()[0]
target = target.numpy()[0]
print("Test Case: " + str(target))
print(img.shape)
output = engine.infer(img)[0]
print(output.shape)
print(output)
print("Test Case: " + str(target))
print ("Prediction: " + str(np.argmax(output[0])))

AastaLLL · January 15, 2018, 8:17am

Hi,

Could you share more about your issue?
Guess that you want to pass weight value to network.add_convolution but meet output incorrect issue. Is this correct?

Instead of using dict type, you can pass weight by numpy array directly.
Here is a pseudo code for your reference:

...
wt = np.random.randn(3*3*3*2).astype(np.float32)
bias = np.zeros(2, dtype=np.float32)
conv0 = network.add_convolution(data, 2, (3, 3), wt, bias)
...

Please feel free to let us know if we misunderstand your question.
Thanks.

songzhaozhe · January 15, 2018, 8:51am

Hi,

Sorry for not stating clearly. This issue arise when I try to better structure my code into functions. The example I mentioned to illustrate the issue was the official example here: API Reference :: NVIDIA Deep Learning TensorRT Documentation

The original code work fine, but when I change a little, the output is Nan, which is really strange. The change is like this:

Original:

weights = model.state_dict()
...
network = builder.create_network()
...
conv2_w = weights['conv2.weight'].cpu().numpy().reshape(-1)
conv2_b = weights['conv2.bias'].cpu().numpy().reshape(-1)
conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
conv2.set_stride((1,1))

My change:

def make_layer(weight_dict, pool1, network):
  conv2_w = weight_dict['conv2.weight'].cpu().numpy().reshape(-1) 
  conv2_b = weight_dict['conv2.bias'].cpu().numpy().reshape(-1)
  conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
  conv2.set_stride((1,1))
  return conv2, network

weights = model.state_dict()
...
network = builder.create_network()
...
#comment these lines but move to a function
#conv2_w = weights['conv2.weight'].cpu().numpy().reshape(-1)
#conv2_b = weights['conv2.bias'].cpu().numpy().reshape(-1)
#conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
#conv2.set_stride((1,1))
conv2, network = make_layer(weights, pool1, network)

After my change, the final output probabilities will become nan/inf, which is strange because the logic is exactly the same.

Thus, I suspect that conv2_w and conv2_b were destructed after the function due to ownership BUG, making the weights incorrect.

Thank you!

songzhaozhe · January 18, 2018, 7:56am

Hi,

I have formatted my code in posts earlier using code format. I think it should be quite easy to replicate from the example code.

Please let me know if any information is unclear.

Thanks!

AastaLLL · January 18, 2018, 8:46am

Thanks for the feedback.

We are discussing this issue internally and it may take some time.
Will update information with you later.

Thanks.

Topic		Replies	Views
Got '-inf' and 'nan' result in ConvolutionLayer with TensorRT 3 Jetson TX2	8	2897	September 11, 2022
TensorRT's nvinfer1::INetworkDefinition::addFullyConnected() does not work as expected for C3D network DeepStream SDK tensorrt	29	2176	December 2, 2020
TensorRT 3.0 Python API addConcatenation function need a ITensor const parameter which cannot be ... Jetson TX2	6	1177	February 27, 2018
About the incompitvle arguments type in tensorrt python API,where creating a network from scratch TensorRT	6	2468	March 21, 2019
TensorRT get different result in python and c++ TensorRT	21	3274	August 24, 2022
Kernel weights has count 2304 but 32640 was expected Jetson TX2 tensorrt , nvbugs	22	4845	May 12, 2022
3D conv result different in PyTorch and TensorRT TensorRT	2	743	July 27, 2020
Down performance when add layer to TensorRT convert from ONNX TensorRT	2	467	June 19, 2020
Inference results are partially weird TensorRT tensorrt , opencv , cuda	3	892	November 6, 2021
Get warning "Bias weights are not set yet" when converting caffe model with custom plugin TensorRT tensorrt	7	718	March 23, 2021

BUG: TensorRT 3.0 Python API did not change variable ownership to C++ in add_convolution(), add_scal...

Related topics