ERROR: python3.6: cuda/cudaRNNBaseLayer.cpp:400: nvinfer1::rt::cuda::RNNBaseLayer::RNNDescriptorState::RNNDescriptorState(const nvinfer1::rt::cuda::RNNBaseLayer&, const nvinfer1::rt::CommonContext&): Assertion `filterDims[0] == l.getTotalNbWeights

While building a tensor rt engine for bidirectional lstm I observed this error, any ideas what this could be indicative of? what does the error mean?

The error is as follows:

python3.6: cuda/cudaRNNBaseLayer.cpp:400: nvinfer1::rt::cuda::RNNBaseLayer::RNNDescriptorState::RNNDescriptorState(const nvinfer1::rt::cuda::RNNBaseLayer&, const nvinfer1::rt::CommonContext&): Assertion `filterDims[0] == l.getTotalNbWeights()’ failed.

Here is my env:

  1. Linux version: Ubuntu 18.04
  2. GPU: GTX745
  3. Nvidia driver version: 430.26
  4. CUDA version: 10.1
  5. CUDNN version: 7
  6. TensorRT version: 5.1.5.0
  7. python3.6

Here is my demo:

#!/usr/bin python
# -*- coding: utf-8 -*-
import tensorrt as trt
import numpy as np
import common

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

def populate_BiLSTM(network):
    hidden_size = 256
    input_size_x = 256

    rnn_input = network.add_input(name='rnn_before', dtype=trt.float32, shape=(1,128,256))
    rnn = network.add_rnn_v2(input=rnn_input, layer_count=2, hidden_size=hidden_size, max_seq_length=128,
                             op=trt.RNNOperation.LSTM)
    rnn.input_mode = trt.RNNInputMode.SKIP
    rnn.direction = trt.RNNDirection.BIDIRECTION
    gate_order = [trt.RNNGateType.INPUT, trt.RNNGateType.CELL, trt.RNNGateType.FORGET, trt.RNNGateType.OUTPUT]

    weights_x = np.zeros((input_size_x, hidden_size), dtype=np.float32)
    bias_x = np.zeros((hidden_size,), dtype=np.float32)
    weights_h = np.zeros((hidden_size, hidden_size), dtype=np.float32)
    bias_h = np.zeros((hidden_size,), dtype=np.float32)

    weights_x_2 = np.ones((input_size_x*2, hidden_size), dtype=np.float32)
    bias_x_2 = np.ones((hidden_size,), dtype=np.float32)

    weights_h_2 = np.ones((hidden_size, hidden_size), dtype=np.float32)
    bias_h_2 = np.ones((hidden_size,), dtype=np.float32)
    for g in gate_order:
        rnn.set_weights_for_gate(layer_index=0, gate=g, is_w=True, weights=weights_x)
        rnn.set_bias_for_gate(layer_index=0, gate=g, is_w=True, bias=bias_x)
        rnn.set_weights_for_gate(layer_index=0, gate=g, is_w=False, weights=weights_h)
        rnn.set_bias_for_gate(layer_index=0, gate=g, is_w=False, bias=bias_h)

        rnn.set_weights_for_gate(layer_index=1, gate=g, is_w=True, weights=weights_x)
        rnn.set_bias_for_gate(layer_index=1, gate=g, is_w=True, bias=bias_x)
        rnn.set_weights_for_gate(layer_index=1, gate=g, is_w=False, weights=weights_h)
        rnn.set_bias_for_gate(layer_index=1, gate=g, is_w=False, bias=bias_h)

        rnn.set_weights_for_gate(layer_index=2, gate=g, is_w=True, weights=weights_x_2)
        rnn.set_bias_for_gate(layer_index=2, gate=g, is_w=True, bias=bias_x_2)
        rnn.set_weights_for_gate(layer_index=2, gate=g, is_w=False, weights=weights_h_2)
        rnn.set_bias_for_gate(layer_index=2, gate=g, is_w=False, bias=bias_h_2)

        rnn.set_weights_for_gate(layer_index=3, gate=g, is_w=True, weights=weights_x_2)
        rnn.set_bias_for_gate(layer_index=3, gate=g, is_w=True, bias=bias_x_2)
        rnn.set_weights_for_gate(layer_index=3, gate=g, is_w=False, weights=weights_h)
        rnn.set_bias_for_gate(layer_index=3, gate=g, is_w=False, bias=bias_x_2)
    print(rnn, rnn.get_output(0).shape)
    rnn.get_output(0).name = 'output'
    network.mark_output(tensor=rnn.get_output(0))

def get_engine():
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
        builder.max_workspace_size = common.GiB(1)
        builder.max_batch_size = 1
        # Populate the network using weights.
        populate_BiLSTM(network)
        print('network', network)
        # Build and return an engine.
        engine = builder.build_cuda_engine(network)
        print('engine', engine)
        return engine

def main():
    if get_engine() is None:
        print('fail')
    else:
        print('success')

if __name__ == '__main__':
    main()