Could not find any supported formats consistent with input/output data types


Try to test “CustomSkipLayerNormPluginDynamic” for int8. I can build the engine for fp16/32, but will get [PluginV2DynamicExt]: could not find any supported formats consistent with input/output data types for int8


docker image: tensorrt-ubuntu20.04-cuda12.0:latest

GPU Type: A30

Relevant Files

Please attach or include links to any models, data, files, or scripts necessary to reproduce your issue. (Github repo, Google Drive, Dropbox, etc.)

Steps To Reproduce


trtuser@2107802eea6a:/workspace/TensorRT/demo/plugin_test$ python
[04/11/2023-07:43:06] [TRT] [I] [MemUsageChange] Init CUDA: CPU +455, GPU +0, now: CPU 478, GPU 15798 (MiB)
[04/11/2023-07:43:35] [TRT] [I] [MemUsageChange] Init builder kernel library: CPU +1368, GPU +302, now: CPU 1923, GPU 16100 (MiB)
[04/11/2023-07:43:35] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation DeprecationWarning: Use build_serialized_network instead.
  engine = builder.build_engine(network, builder_config)
[04/11/2023-07:43:37] [TRT] [I] Graph optimization time: 7.6554e-05 seconds.
[04/11/2023-07:43:44] [TRT] [I] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +1711, GPU +370, now: CPU 3681, GPU 16482 (MiB)
[04/11/2023-07:43:46] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +251, GPU +58, now: CPU 3932, GPU 16540 (MiB)
[04/11/2023-07:43:46] [TRT] [I] Timing cache disabled. Turning it on will improve builder speed.
[04/11/2023-07:43:46] [TRT] [E] 9: (Unnamed Layer* 0) [PluginV2DynamicExt]: could not find any supported formats consistent with input/output data types
[04/11/2023-07:43:46] [TRT] [E] 9: [pluginV2Builder.cpp::reportPluginError::24] Error Code 9: Internal Error ((Unnamed Layer* 0) [PluginV2DynamicExt]: could not find any supported formats consistent with input/output data types)
Traceback (most recent call last):
  File "", line 114, in <module>
  File "", line 106, in main
    serialized_engine = engine.serialize()
AttributeError: 'NoneType' object has no attribute 'serialize'


import argparse
import ctypes
import json
import numpy as np
import os
import os.path
import re
import sys
import time
import onnx
import pycuda.autoinit
import pdb
# TensorRT
import tensorrt as trt
sys.path.insert(0, '/workspace/TensorRT/demo/BERT')
from helpers.calibrator import BertCalibrator as BertCalibrator

TensorRT Initialization
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt_version = [int(n) for n in trt.__version__.split('.')]

# Import necessary plugins for demoBERT
plugin_lib_name = "nvinfer_plugin.dll" if sys.platform == "win32" else ""
env_name_to_add_path = "PATH" if sys.platform == "win32" else "LD_LIBRARY_PATH"
handle = ctypes.CDLL(plugin_lib_name, mode=ctypes.RTLD_GLOBAL)
if not handle:
    raise RuntimeError("Could not load plugin library. Is `{}` on your {}?".format(plugin_lib_name, env_name_to_add_path))

trt.init_libnvinfer_plugins(TRT_LOGGER, "")
plg_registry = trt.get_plugin_registry()
skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "1", "")

bs = 10
seq = 512
h = 768
# dtype = trt.float32
# dtype = trt.float16
dtype = trt.DataType.INT8

def skipln(init_dict, network, input_tensor, skip, bias=None):
    Add the skip layer
    idims = input_tensor.shape
    hidden_size = idims[2]

    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
    wbeta = init_dict["beta"]
    pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32)
    wgamma = init_dict["gamma"]
    pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32)
    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)

    fields = [pf_ld, pf_beta, pf_gamma, pf_type ]

    if bias:
        pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32)

    pfc = trt.PluginFieldCollection(fields)
    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)

    skipln_inputs = [input_tensor, skip]
    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
    return layer

def build_engine(init_dict):
    explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config:
        if dtype == trt.float16:
        if dtype == trt.DataType.INT8:
            calibrationCacheFile = "BertSquadL{}H{}A{}S{}CalibCache".format(1, h, 1, seq)
            calibrator = BertCalibrator('/workspace/TensorRT/demo/BERT/squad/dev-v1.1.json', '/workspace/TensorRT/demo/BERT/models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt', calibrationCacheFile, 1, seq, 100)
            builder_config.int8_calibrator = calibrator
        builder_config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED
        input_tensor0 = network.add_input(name="input_tensor0", dtype=dtype, shape=(bs, seq, h))
        input_tensor1 = network.add_input(name="input_tensor1", dtype=dtype shape=(bs, seq, h))
        skiplayer = skipln(init_dict, network, input_tensor0, input_tensor1, None)
        # skiplayer.set_output_type(0, trt.DataType.FLOAT)
        out = skiplayer.get_output(0)
        engine = builder.build_engine(network, builder_config)
    return engine

def main():
        'beta': trt.Weights(np.ascontiguousarray(np.float32(np.random.rand(1,1,h)))),
        'gamma': trt.Weights(np.ascontiguousarray(np.float32(np.random.rand(1,1,h))))
    engine = build_engine(init_dict)
    TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Serializing Engine...")
    serialized_engine = engine.serialize()
    TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format('./ln{0}_{1}_{2}.engine'.format(bs, seq, h)))
    with open('./ln{0}_{1}_{2}.engine'.format(bs, seq, h), "wb") as fout:
    TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.")   

if __name__ == "__main__":

Please refer to below links related custom plugin implementation and sample:

While IPluginV2 and IPluginV2Ext interfaces are still supported for backward compatibility with TensorRT 5.1 and 6.0.x respectively, however, we recommend that you write new plugins or refactor existing ones to target the IPluginV2DynamicExt or IPluginV2IOExt interfaces instead.