Environment
TensorRT Version: 7.1.3
GPU Type: Jetson nano
CUDA Version: 10.2
Python Version (if applicable): 3.6
TensorFlow Version (if applicable): 1.15.4
Steps To Reproduce
After converting our models from onnx to tensorrt engine, I ran them on jetson nano. Howerver, jetson nano has limited memory.
So i decide to set config.max_workspace_size = 128 * 1024 * 1024. But, for 1 model (model size < 20MB), it almost use 1.4GB memory.
How to reduce memory usage?
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
def _build_engine_onnx(input_onnx: Union[str, bytes], force_fp16: bool = False, max_batch_size: int = 1):
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network(EXPLICIT_BATCH) as network, \
builder.create_builder_config() as config, \
trt.OnnxParser(network, TRT_LOGGER) as parser:
if force_fp16 is True:
logging.info('Building TensorRT engine with FP16 support.')
has_fp16 = builder.platform_has_fast_fp16
if not has_fp16:
logging.warning('Builder report no fast FP16 support. Performance drop expected')
config.set_flag(trt.BuilderFlag.FP16)
config.set_flag(trt.BuilderFlag.STRICT_TYPES)
config.max_workspace_size =128 * 1024 * 1024
if not parser.parse(input_onnx):
print('ERROR: Failed to parse the ONNX')
for error in range(parser.num_errors):
print(parser.get_error(error))
sys.exit(1)
if max_batch_size != 1:
logging.warning('Batch size !=1 is used. Ensure your inference code supports it.')
profile = builder.create_optimization_profile()
# Get input name and shape for building optimization profile
input = network.get_input(0)
im_size = input.shape[2:]
input_name = input.name
profile.set_shape(input_name, (1, 3) + im_size, (1, 3) + im_size, (max_batch_size, 3) + im_size)
config.add_optimization_profile(profile)
return builder.build_engine(network, config=config)