I got this when try calibarate a int8 trt engine.
my onnx model can convert to trt engine using onnx2trt
, while calibration, error like this got:
[TensorRT] INTERNAL ERROR: Assertion failed: d.nbDims >= 1
Does any body got similar issue? It maybe caused by Gather element issue in onnx model? Since one of it’s inputs could be a Scalar value without any shape?
Hi @LucasJin ,
Can you please help us with your model and script so that we can debug the issue.
Thanks!
Same error for me, model is slightly modified bert-base (bert_256.onnx.zip - Google Drive ), conversion script:
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import pickle
import os
class Int8Calibrator(trt.IInt8MinMaxCalibrator):
def __init__(self):
super().__init__()
self.cache_file = 'int8_calibrator_cache.bin'
self.inputs = pickle.load(open("10_batches_bs_256.pickle", "rb"))
self.next_id = 0
self.gpu_arrays = {}
def get_batch(self, names):
print(f"get batch {names}")
if self.next_id >= len(self.inputs):
return None
input_ids, input_mask, segment_ids = self.inputs[self.next_id]
self.next_id += 1
result = []
for input_name in names:
if input_name == "input_ids":
tensor = input_ids
elif input_name == "segment_ids":
tensor = segment_ids
elif input_name == "input_mask":
tensor = input_mask
else:
print(f"Wring input name: {input_name}")
if input_name not in self.gpu_arrays:
self.gpu_arrays[input_name] = cuda.mem_alloc(tensor.nbytes)
cuda.memcpy_htod(self.gpu_arrays[input_name], tensor)
result.append(self.gpu_arrays[input_name])
return result
def get_batch_size(self):
print("get batch size")
return 1
def read_calibration_cache(self):
print("read cache")
if os.path.exists(self.cache_file):
with open(self.cache_file, "rb") as f:
return f.read()
print("read cache done")
def write_calibration_cache(self, cache):
print("Write cache")
with open(self.cache_file, "wb") as f:
f.write(cache)
def build_engine(model_file, fp16=False, int8=False):
print("building engine")
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)
builder.fp16_mode = fp16
builder.int8_mode = int8
config = builder.create_builder_config()
config.max_workspace_size = 8 * (1024 ** 3) ## 8 GB
#if fp16:
# config.flags |= 1 << int(trt.BuilderFlag.FP16)
config.int8_calibrator = Int8Calibrator()
config.set_flag(trt.BuilderFlag.INT8)
#config.set_flag(trt.BuilderFlag.STRICT_TYPES)
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(explicit_batch)
with trt.OnnxParser(network, TRT_LOGGER) as parser:
with open(model_file, 'rb') as model:
parsed = parser.parse(model.read())
assert parsed
engine = builder.build_engine(network, config=config)
return engine
engine = build_engine("bert_256.onnx", fp16=True, int8=True)
with open('bert_256_int8.trt', 'wb') as f:
f.write(bytearray(engine.serialize()))
NVES
April 15, 2021, 1:07pm
5