Description
I’m trying to build FP32, FP16, INT8 optimised model for resnet50 onnx converted model. FP32 and FP16 is working fine.
INT8 optimisation is not working, no cache file is generated. I have followed steps given in int8_sample
Kindly help to build optimised engine file in INT8 mode
Environment
TensorRT Version: 8.2.1.8-1+cuda10.2
GPU Type: Jetson nano
Nvidia Driver Version: CUDA Driver Version: 10.2
CUDA Version: cuda-toolkit-10-2 (= 10.2.460-1)
CUDNN Version: cuDNN Version: 8.2
Operating System + Version: Ubuntu 18.04(l4t with jetpack)
Python Version (if applicable): 3.6.9
TensorFlow Version (if applicable):
PyTorch Version (if applicable):
Baremetal or Container (if container which image + tag):
Relevant Files
----main file----
def main():
# initialize TensorRT engine and parse ONNX model
print('******************************')
print('Started building engine...')
cache_file = 'INT8/resnet50_int8_calibration.cache'
#using 100 sample images randomly downloaded from imagenet dataset
training_set = 'imagenet/imagenet_images/'
img_per_batch = 5
Int8_calibrator = Int8Calibrator(training_set, cache_file=cache_file, batch_size=img_per_batch)
engine = build_engine(ONNX_FILE_PATH,Int8_calibrator)
----builder config----
config = builder.create_builder_config()
#config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
config.profiling_verbosity = trt.ProfilingVerbosity.DETAILED
# Calibration cofig
if builder.platform_has_fast_int8:
print('Yes! Continuing in INT8 mode')
config.set_flag(trt.BuilderFlag.INT8)
config.int8_calibrator = Int8_calibrator
else:
exit
-----custom calibration file----
import tensorrt as trt
import os
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import Image
import numpy as np
def preprocess_image_here(input_image_path):
image = Image.open(input_image_path)
h, w = (224,224)
image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS))
image_arr = image_arr.reshape(3,h,w)
# This particular model requires some preprocessing, specifically, mean normalization.
input_img = (image_arr / 255.0 - 0.45) / 0.225
return input_img
class Int8Calibrator(trt.IInt8EntropyCalibrator2):
def __init__(self, training_data, cache_file, batch_size):
# Whenever you specify a custom constructor for a TensorRT class,
# you MUST call the constructor of the parent explicitly.
trt.IInt8EntropyCalibrator2.__init__(self)
self.cache_file = cache_file
# Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned.
#oPreprocessObj = preprocess_obj_loc()
self.data = []
for root, dir, files in os.walk(training_data):
for file in files:
img = os.path.join(root, file)
#print(img)
pre_process_img = preprocess_image_here(img)
self.data.append(pre_process_img)
self.data = np.array(self.data)
print('Inside the calibrator...')
self.batch_size = batch_size
self.current_index = 0
# Allocate enough memory for a whole batch.
self.device_input = cuda.mem_alloc(self.data[0].nbytes * self.batch_size)
def get_batch_size(self):
return self.batch_size
# TensorRT passes along the names of the engine bindings to the get_batch function.
# You don't necessarily have to use them, but they can be useful to understand the order of
# the inputs. The bindings list is expected to have the same ordering as 'names'.
def get_batch(self, names):
if self.current_index + self.batch_size > self.data.shape[0]:
print('\tinsise get_batch cond 1')
return None
current_batch = int(self.current_index / self.batch_size)
if current_batch % self.batch_size == 0:
print("Calibrating batch {:}, containing {:} images".format(current_batch, self.batch_size))
batch = self.data[self.current_index:self.current_index + self.batch_size].ravel()
cuda.memcpy_htod(self.device_input, batch)
self.current_index += self.batch_size
print('\tinsise get_batch')
return [self.device_input]
def read_calibration_cache(self):
print('\t inside read_calib_cache')
# If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
if os.path.exists(self.cache_file):
with open(self.cache_file, "rb") as f:
return f.read()
def write_calibration_cache(self, cache):
print('\t inside write_calib_cache')
with open(self.cache_file, "wb") as f:
f.write(cache)
Steps To Reproduce
Please include:
-
No error in building
-
Not building in INT8 mode. it is building in default mode FP32