[TensorRT] INTERNAL ERROR: Assertion failed: d.nbDims >= 1 int8

I got this when try calibarate a int8 trt engine.

my onnx model can convert to trt engine using onnx2trt, while calibration, error like this got:

[TensorRT] INTERNAL ERROR: Assertion failed: d.nbDims >= 1

Does any body got similar issue? It maybe caused by Gather element issue in onnx model? Since one of it’s inputs could be a Scalar value without any shape?

Hi @LucasJin,

Can you please help us with your model and script so that we can debug the issue.
Thanks!

Same error for me, model is slightly modified bert-base (bert_256.onnx.zip - Google Drive), conversion script:

import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import pickle
import os


class Int8Calibrator(trt.IInt8MinMaxCalibrator):
def __init__(self):
    super().__init__()
    self.cache_file = 'int8_calibrator_cache.bin'
    self.inputs = pickle.load(open("10_batches_bs_256.pickle", "rb"))
    self.next_id = 0
    self.gpu_arrays = {}


def get_batch(self, names):
    print(f"get batch {names}")
    if self.next_id >= len(self.inputs):
        return None
    
    input_ids, input_mask, segment_ids = self.inputs[self.next_id]
    self.next_id += 1
    
    result = []
    for input_name in names:
        if input_name == "input_ids":
            tensor = input_ids
        elif input_name == "segment_ids":
            tensor = segment_ids
        elif input_name == "input_mask":
            tensor = input_mask
        else:
            print(f"Wring input name: {input_name}")

        if input_name not in self.gpu_arrays:
            self.gpu_arrays[input_name] = cuda.mem_alloc(tensor.nbytes)

        cuda.memcpy_htod(self.gpu_arrays[input_name], tensor)
        result.append(self.gpu_arrays[input_name])

    return result


def get_batch_size(self):
    print("get batch size")
    return 1

def read_calibration_cache(self):
    print("read cache")
    if os.path.exists(self.cache_file):
        with open(self.cache_file, "rb") as f:
            return f.read()
    print("read cache done")

def write_calibration_cache(self, cache):
    print("Write cache")
    with open(self.cache_file, "wb") as f:
        f.write(cache)


def build_engine(model_file, fp16=False, int8=False):
    print("building engine")
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(TRT_LOGGER)
    builder.fp16_mode = fp16
    builder.int8_mode = int8

    config = builder.create_builder_config()
    config.max_workspace_size = 8 * (1024 ** 3)  ## 8 GB
    #if fp16:
    #    config.flags |= 1 << int(trt.BuilderFlag.FP16)

    config.int8_calibrator = Int8Calibrator()
    config.set_flag(trt.BuilderFlag.INT8)
    #config.set_flag(trt.BuilderFlag.STRICT_TYPES)

    explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network = builder.create_network(explicit_batch)

    with trt.OnnxParser(network, TRT_LOGGER) as parser:
        with open(model_file, 'rb') as model:
            parsed = parser.parse(model.read())
            assert parsed
        
            engine = builder.build_engine(network, config=config)
            return engine

engine = build_engine("bert_256.onnx", fp16=True, int8=True)

with open('bert_256_int8.trt', 'wb') as f:
    f.write(bytearray(engine.serialize()))

how to solve it? thx

Hi, Please refer to the below links to perform inference in INT8
https://github.com/NVIDIA/TensorRT/blob/master/samples/opensource/sampleINT8/README.md

Thanks!