@NVES
Thanks for your reply. Onnx model https://drive.google.com/file/d/1JVUiIBysRZjAA0a9lK9vKjHWG88d-qls/view?usp=sharing, onnx.checker.check_model
return no errors.
The model was trained in pytorch with [0,1] input normalised with mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225].
My env:
'JETSON_JETPACK': '4.4.1'
'JETSON_L4T_REVISION': '4.4'
'JETSON_TENSORRT': '7.1.3.0',
'JETSON_CUDNN': '8.0.0.180'
'JETSON_CUDA': '10.2.89'
Calibration script:
import os
import glob
import yaml
import numpy as np
from PIL import Image
IMG_SUFFIX = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
def get_img_arr(img_path, img_size, mean=None, std=None):
if any([mean is not None, std is not None]):
assert all([mean is not None, std is not None]), 'Provide both mean and std or neither'
img_pil = Image.open(img_path).resize((img_size[1],img_size[0]), resample=Image.BILINEAR) # swap H,W to W,H for PIL
img_arr = (np.array(img_pil)/255).astype(np.float32).transpose(2,0,1)
if mean is not None and std is not None:
img_arr = (img_arr - mean)/std
img_arr = np.ascontiguousarray(np.expand_dims(img_arr,0))
# img_arr = np.expand_dims(img_arr,0)
return img_arr
class Dataset:
def __init__(self, folder, img_size, batch_size, norm):
images = []
for suffix in IMG_SUFFIX:
images.extend(glob.glob('{}/**/*{}'.format(folder, suffix)))
self.images = images
norm_cfg = os.path.join(folder, 'norm.yaml')
self.norm = os.path.exists(norm_cfg) and norm
self.mean = None
self.std = None
if self.norm:
with open(norm_cfg) as f:
cfg = yaml.safe_load(f)
self.mean = np.array(cfg['mean']).reshape(-1,1,1)
self.std = np.array(cfg['std']).reshape(-1,1,1)
self.img_max_nbytes = max([get_img_arr(img, img_size, mean=self.mean, std=self.std).nbytes for img in self.images])
self.img_size = img_size
def __iter__(self):
return iter([get_img_arr(img, self.img_size, mean=self.mean, std=self.std) for img in self.images])
import os
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
from dataset import Dataset
class Calibrator(trt.IInt8EntropyCalibrator2):
def __init__(self, data_folder, img_size, cache_file, cache_exist=False, batch_size=1, norm=False):
trt.IInt8EntropyCalibrator2.__init__(self)
self.cache_file = cache_file
self.cache_exist = cache_exist
self.batch_size = batch_size
dataset = Dataset(data_folder, img_size, batch_size, norm=norm)
self.batches = iter(dataset)
self.device_input = cuda.mem_alloc(dataset.img_max_nbytes * self.batch_size)
def get_batch_size(self):
return self.batch_size
def get_batch(self, names):
try:
# Assume self.batches is a generator that provides batch data.
data = next(self.batches) # for calibration we take just img array
print('IMG', data.shape, data.min(), data.max())
# Assume that self.device_input is a device buffer allocated by the constructor.
cuda.memcpy_htod(self.device_input, data)
return [int(self.device_input)]
except StopIteration:
# When we're out of batches, we return either [] or None.
# This signals to TensorRT that there is no calibration data remaining.
return None
def read_calibration_cache(self):
# If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
if self.cache_exist:
print('READ FROM EXISTING CALIBRATION CACHE')
# if os.path.exists(self.cache_file):
with open(self.cache_file, "rb") as f:
return f.read()
else:
print('NO EXISTING CALIBRATION CACHE. PERFORM CALIBRATION')
def write_calibration_cache(self, cache):
with open(self.cache_file, "wb") as f:
f.write(cache)
Ok, now two cases.
Case 1. Calibration without normalisation. Cache - classifier_int8_noqat_unnorm.cache (2.6 KB). Log - log_classifier_int8_noqat_unnorm.txt (1.8 MB).
As you can see from the log during calibration the engine sees the input in the range [0,1], which is not normalised. If we look further down the log then scales and activations ranges make sense. But what is interesting is the input range
which is [-1.00393,1.00393] while the engine sees inputs in the range [0,1]. Is is doing some normalisation by default?
Case 2. Calibration with normalisation. Using mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]. Cache - classifier_int8_noqat_norm.cache (2.6 KB). Log - log_classifier_int8_noqat_norm.txt (1.8 MB).
We can see from the log that during calibration the engine sees the normalised input in the range [-value, +value]. But then the actual scales and ranges does not make any sense with most of them set to [-inf, inf]. What is strange is the input range
[-2.04692e+38,2.04692e+38].