BUG: TensorRT 3.0 Python API did not change variable ownership to C++ in add_convolution(), add_scal...

Hi,

Through my trials, I guess TensorRT 3.0 Python API did not change variable ownership to C++ in functions such as add_convolution(), add_scale(). The reproduction process is the following:

Simply move a part of example code to add a layer into a function, and need not change anything else. Like the following:

def make_layer(weight_dict, pool1, network):
    conv_w = weight_dict['conv2.weight'].cpu().numpy().reshape(-1)
    conv_b = weight_dict['conv2.bias'].cpu().numpy().reshape(-1)
    conv = network.add_convolution(pool1.get_output(0), 50, (5,5), conv_w, conv_b)
    conv.set_stride((1,1))
    return conv, network

After the function, conv_w and conv_b will likely be destructed by Python, but I guess the underlying C++ pointer still uses the variables. As a result, the network will output Nan/inf/0.

To work around with it, I can pass a dict type into the function and store the weights in the dict. Then everything goes well. Thus I believe my guess is right.

Here’s a complete code almost same (just moving conv2 to a function) to the example code to reproduce:

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from matplotlib.pyplot import imshow #to show test case
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

BATCH_SIZE = 64
TEST_BATCH_SIZE = 1000
EPOCHS = 3
LEARNING_RATE = 0.001
SGD_MOMENTUM = 0.5
SEED = 1
LOG_INTERVAL = 10

#Enable Cuda
torch.cuda.manual_seed(SEED)

#Dataloader
kwargs = {'num_workers': 1, 'pin_memory': True}
train_loader  = torch.utils.data.DataLoader(
    datasets.MNIST('/tmp/mnist/data', train=True, download=True,
                    transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.1307,), (0.3081,))
        ])),
    batch_size=BATCH_SIZE,
    shuffle=True,
    **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('/tmp/mnist/data', train=False,
                   transform=transforms.Compose([
                   transforms.ToTensor(),
                    transforms.Normalize((0.1307,), (0.3081,))
        ])),
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    **kwargs)

#Network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
        self.conv2 = nn.Conv2d(20, 50, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(800, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
        x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
        x = x.view(-1, 800)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x)

model = Net()
model.cuda()

optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=SGD_MOMENTUM)

def train(epoch):
    model.train()
    for batch, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        #if batch % LOG_INTERVAL == 0:
            #print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
            #      .format(epoch,
            #              batch * len(data),
            #              len(train_loader.dataset),
            #              100. * batch / len(train_loader),
            #              loss.data[0]))

def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()
    test_loss /= len(test_loader)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
          .format(test_loss,
                  correct,
                  len(test_loader.dataset),
                  100. * correct / len(test_loader.dataset)))

for e in range(EPOCHS):
    train(e + 1)
    test(e + 1)

def make_layer(weight_dict, pool1, network):
    conv2_w = weight_dict['conv2.weight'].cpu().numpy().reshape(-1)
    conv2_b = weight_dict['conv2.bias'].cpu().numpy().reshape(-1)
    conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
    conv2.set_stride((1,1))
    return conv2, network

weights = model.state_dict()
torch.save(weights, 'weights')
G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
builder = trt.infer.create_infer_builder(G_LOGGER)
network = builder.create_network()
#Name for the input layer, data type, tuple for dimension
data = network.add_input("data", trt.infer.DataType.FLOAT, (1, 28, 28))
assert(data)
#-------------
conv1_w = weights['conv1.weight'].cpu().numpy().reshape(-1)
conv1_b = weights['conv1.bias'].cpu().numpy().reshape(-1)
conv1 = network.add_convolution(data, 20, (5,5),  conv1_w, conv1_b)
assert(conv1)
conv1.set_stride((1,1))

#-------------
pool1 = network.add_pooling(conv1.get_output(0), trt.infer.PoolingType.MAX, (2,2))
assert(pool1)
pool1.set_stride((2,2))
#-------------
#conv2_w = weights['conv2.weight'].cpu().numpy().reshape(-1)
#conv2_b = weights['conv2.bias'].cpu().numpy().reshape(-1)
#conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
#conv2.set_stride((1,1))
conv2, network = make_layer(weights, pool1, network)
##-------------
pool2 = network.add_pooling(conv2.get_output(0), trt.infer.PoolingType.MAX, (2,2))
assert(pool2)
pool2.set_stride((2,2))

#-------------
fc1_w = weights['fc1.weight'].cpu().numpy().reshape(-1)
fc1_b = weights['fc1.bias'].cpu().numpy().reshape(-1)
fc1 = network.add_fully_connected(pool2.get_output(0), 500, fc1_w, fc1_b)
assert(fc1)

#-------------
relu1 = network.add_activation(fc1.get_output(0), trt.infer.ActivationType.RELU)
assert(relu1)

#-------------
fc2_w = weights['fc2.weight'].cpu().numpy().reshape(-1)
fc2_b = weights['fc2.bias'].cpu().numpy().reshape(-1)
fc2 = network.add_fully_connected(relu1.get_output(0), 10, fc2_w, fc2_b)
assert(fc2)

fc2.get_output(0).set_name("prob")
network.mark_output(fc2.get_output(0))

builder.set_max_batch_size(1)
builder.set_max_workspace_size(1 << 20)

ori_engine = builder.build_cuda_engine(network)
engine = trt.lite.Engine(engine_stream=ori_engine.serialize(), max_batch_size=1)
network.destroy()
builder.destroy()
ori_engine.destroy()

img, target = next(iter(test_loader))
img = img.numpy()[0]
target = target.numpy()[0]
print("Test Case: " + str(target))
print(img.shape)
output = engine.infer(img)[0]
print(output.shape)
print(output)
print("Test Case: " + str(target))
print ("Prediction: " + str(np.argmax(output[0])))

Hi,

Could you share more about your issue?
Guess that you want to pass weight value to network.add_convolution but meet output incorrect issue. Is this correct?

Instead of using dict type, you can pass weight by numpy array directly.
Here is a pseudo code for your reference:

...
wt = np.random.randn(3*3*3*2).astype(np.float32)
bias = np.zeros(2, dtype=np.float32)
conv0 = network.add_convolution(data, 2, (3, 3), wt, bias)
...

Please feel free to let us know if we misunderstand your question.
Thanks.

Hi,

Sorry for not stating clearly. This issue arise when I try to better structure my code into functions. The example I mentioned to illustrate the issue was the official example here: API Reference :: NVIDIA Deep Learning TensorRT Documentation

The original code work fine, but when I change a little, the output is Nan, which is really strange. The change is like this:

Original:

weights = model.state_dict()
...
network = builder.create_network()
...
conv2_w = weights['conv2.weight'].cpu().numpy().reshape(-1)
conv2_b = weights['conv2.bias'].cpu().numpy().reshape(-1)
conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
conv2.set_stride((1,1))

My change:

def make_layer(weight_dict, pool1, network):
  conv2_w = weight_dict['conv2.weight'].cpu().numpy().reshape(-1) 
  conv2_b = weight_dict['conv2.bias'].cpu().numpy().reshape(-1)
  conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
  conv2.set_stride((1,1))
  return conv2, network

weights = model.state_dict()
...
network = builder.create_network()
...
#comment these lines but move to a function
#conv2_w = weights['conv2.weight'].cpu().numpy().reshape(-1)
#conv2_b = weights['conv2.bias'].cpu().numpy().reshape(-1)
#conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
#conv2.set_stride((1,1))
conv2, network = make_layer(weights, pool1, network)

After my change, the final output probabilities will become nan/inf, which is strange because the logic is exactly the same.

Thus, I suspect that conv2_w and conv2_b were destructed after the function due to ownership BUG, making the weights incorrect.

Thank you!

Hi,

I have formatted my code in posts earlier using code format. I think it should be quite easy to replicate from the example code.

Please let me know if any information is unclear.

Thanks!

Thanks for the feedback.

We are discussing this issue internally and it may take some time.
Will update information with you later.

Thanks.