Hi,
Following the Python autonomous vehicles sample above on TRT5.1.3.6 cuda10.1 cudnn7 ubuntu18.04 ppc64 T4 16GB, the trt.infer.EntropyCalibrator namespace is not found, the available base calibrators are trt.IInt8EntropyCalibrator and trt.IInt8EntropyCalibrator2 and IInt8LegacyCalibrator. Using either of the entropy ones for Caffe SSD for INT8, parser returns empty model tensors. Using them for Caffe FRCNN for INT8, parser succeeds and calibrator is able to call ImageBatchStream multiple times, but eventually errors out in builder.build_cuda_engine
RuntimeError: Unable to cast Python instance to C++ type (compile in debug mode for details)
Both of these models successfully build for FP32 and FP16 which don’t require calibrator. Here is the code for calibrator and batch image stream. Is there some additional debugging tracing flag that would point to the root cause of the problems for INT8?
############################################################
# class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator2):
class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator):
def __init__(self, input_layers, stream):
# trt.IInt8EntropyCalibrator2.__init__(self)
trt.IInt8EntropyCalibrator.__init__(self)
self.input_layers = input_layers
self.stream = stream
self.d_input = cuda.mem_alloc(self.stream.calibration_data.nbytes)
stream.reset()
def get_batch_size(self):
return self.stream.batch_size
# def get_batch(self, bindings, names):
def get_batch(self, names, unicode=None):
batch = self.stream.next_batch()
if not batch.size:
return None
cuda.memcpy_htod(self.d_input, batch)
for i in self.input_layers[0]:
assert names[0] != i
bindings = int(self.d_input)
return bindings
def read_calibration_cache(self):
return None
def write_calibration_cache(self, cache):
import ctypes
cache = ctypes.c_char_p(int(cache))
with open('calibration_cache.bin', 'wb') as f:
f.write(cache.value)
return None
########################
class ImageBatchStream():
def __init__(self, batch_size, calibration_files, preprocessor=None):
self.batch_size = batch_size
self.max_batches = (len(calibration_files) // batch_size) + (1 if (len(calibration_files) % batch_size) else 0)
self.files = calibration_files
self.calibration_data = np.zeros((batch_size, 3, 600, 1000), dtype=np.float32)
self.batch = 0
self.preprocessor = preprocessor
@staticmethod
def read_image_chw(path):
from fast_rcnn.config import cfg
import cv2
im = cv2.imread(path)
im = cv2.resize(im, dsize=(1000, 600), interpolation=cv2.INTER_LINEAR)
im = im.astype(np.float32, copy=True)
im = cv2.subtract(im, (cfg.PIXEL_MEANS[0][0][0], cfg.PIXEL_MEANS[0][0][1], cfg.PIXEL_MEANS[0][0][2], 0))
im = im.transpose((2, 0, 1))
return im
def reset(self):
self.batch = 0
def next_batch(self):
if self.batch < self.max_batches:
imgs = []
files_for_batch = self.files[self.batch_size * self.batch: self.batch_size * (self.batch + 1)]
for f in files_for_batch:
print("[ImageBatchStream] Processing ", f)
img = ImageBatchStream.read_image_chw(f)
# img = self.preprocessor(img)
imgs.append(img)
for i in range(len(imgs)):
self.calibration_data[i] = imgs[i]
self.batch += 1
ret = np.ascontiguousarray(self.calibration_data, dtype=np.float32)
return ret
else:
return np.array([])
############################################################
def build_engine(trt_deploy_path, trt_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False):
with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
builder.max_workspace_size = 1 << 30
if trt_engine_datatype == trt.DataType.HALF:
builder.fp16_mode = True
elif trt_engine_datatype == trt.DataType.INT8 and builder.platform_has_fast_int8:
builder.int8_mode = True
builder.max_batch_size = batch_size
from os import listdir
from os.path import isdir, isfile, join
calibration_files = []
im = '/tmp/files'
if isdir(im):
calibration_files.extend([join(im, f) for f in listdir(im) if isfile(join(im, f))])
else:
calibration_files.append(im)
batchstream = ImageBatchStream(batch_size, calibration_files)
int8_calibrator = PythonEntropyCalibrator(["data"], batchstream)
builder.int8_calibrator = int8_calibrator
model_tensors = parser.parse(trt_deploy_path, trt_model_path, network, trt_engine_datatype)
network.mark_output(model_tensors.find('bbox_pred'))
network.mark_output(model_tensors.find('cls_prob'))
network.mark_output(model_tensors.find('rois'))
if not silent:
print("Building TensorRT engine. This may take few minutes.")
return builder.build_cuda_engine(network)
# RuntimeError: Unable to cast Python instance to C++ type (compile in debug mode for details
)