calibrator.py文件
# PreprocessSSD
def PreprocessSSD(img_path, ssd_input_resolution):
# load image
image = cv2.imread(img_path)
assert image.shape[2] == 3 # input
rgb_means = (104, 117, 123)
interp_method = cv2.INTER_LINEAR
# resize
img = cv2.resize(image, ssd_input_resolution, interpolation=interp_method).astype(np.float32)
img -= rgb_means
# transform
img = img.transpose((2, 0, 1)) # shape = CHW
# img = np.expand_dims(img, axis=0)
return np.array(img, dtype=np.float32, order='C')
class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator2):
def __init__(self, batch_data_dir, cache_file):
# Whenever you specify a custom constructor for a TensorRT class,
# you MUST call the constructor of the parent explicitly.
trt.IInt8EntropyCalibrator2.__init__(self)
self.cache_file = cache_file
# Get a list of all the batch files in the batch folder.
self.batch_files = [os.path.join(batch_data_dir, f) for f in os.listdir(batch_data_dir)]
# Find out the shape of a batch and then allocate a device buffer of that size.
self.batch_size = 1 # batch size
self.batch_round = 50 # epoch
# network input shape
self.shape = self.read_batch_file(self.batch_files[0:self.batch_size]).shape
print("calibration inference shape :{}".format(self.shape))
# Each element of the calibration data is a float32.
self.device_input = cuda.mem_alloc(trt.volume(self.shape) * trt.float32.itemsize)
# Create a generator that will give us batches. We can use next() to iterate over the result.
def load_batches():
start = 0
for i in range(self.batch_round):
print("Calibrating batch {:}, containing {:} images".format(i, self.batch_size))
yield self.read_batch_file(self.batch_files[start:start+self.batch_size])
start = start + self.batch_size
# yield each batch data
self.batches = load_batches()
# This function is used to load calibration data from the calibration batch files.
# In this implementation, one file corresponds to one batch, but it is also possible to use
# aggregate data from multiple files, or use only data from portions of a file.
def read_batch_file(self, filename):
batch = []
input_resolution_ssd_HW = (300, 300)
for img_path in filename:
image = PreprocessSSD(img_path, input_resolution_ssd_HW)
batch.append(image)
batch = np.array(batch) # shape
batch = batch.reshape(self.batch_size, 3, 300, 300)
return batch
def get_batch_size(self):
return self.shape[0]
# TensorRT passes along the names of the engine bindings to the get_batch function.
# You don't necessarily have to use them, but they can be useful to understand the order of
# the inputs. The bindings list is expected to have the same ordering as 'names'.
def get_batch(self, names):
try:
# Get a single batch.
data = next(self.batches).ravel()
# Copy to device, then return a list containing pointers to input device buffers.
cuda.memcpy_htod(self.device_input, data)
return [int(self.device_input)]
except StopIteration:
# When we're out of batches, we return either [] or None.
# This signals to TensorRT that there is no calibration data remaining.
return None
def read_calibration_cache(self):
# If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
if os.path.exists(self.cache_file):
with open(self.cache_file, "rb") as f:
return f.read()
def write_calibration_cache(self, cache):
with open(self.cache_file, "wb") as f:
f.write(cache)
engine生成脚本
def build_int8_engine(onnx_file_path, trt_logger, trt_engine_datatype, batch_size, calib, silent=False):
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(trt_logger) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, trt_logger) as parser:
builder.max_workspace_size = 1 << 30 # work space
builder.max_batch_size = calib.get_batch_size() # batch size
# acrivate int8 mode
builder.int8_mode = True
builder.int8_calibrator = calib
print("Builder works on Int8 mode. Max batch size:{:d}, Max work space size:{:d}.".format(builder.max_batch_size, builder.max_workspace_size))
# Parse model file
if not os.path.exists(onnx_file_path):
print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
exit(0)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
if not parser.parse(model.read()):
print ('ERROR: Failed to parse the ONNX file.')
for error in range(parser.num_errors):
print (parser.get_error(error))
return None
# The actual yolov3.onnx is generated with batch size 32. Reshape input to batch size 1
network.get_input(0).shape = [1, 3, 300, 300]
print('Completed parsing of ONNX file')
if not silent:
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
# build engine
return builder.build_cuda_engine(network)