Description
I followed the official quick start guide:
to generate the tensorrt engine from the onnx model.
And I found that the engine generated by setting --fp16:
trtexec --onnx=fcn-resnet101.onnx --fp16 --precisionConstraints --workspace=2048 --minShapes=input:1x3x256x256 --optShapes=input:1x3x1026x1282 --maxShapes=input:1x3x1140x2560 --buildOnly --saveEngine=fcn-resnet101.trt
has the size similar to the one generated without setting this flag:
trtexec --onnx=fcn-resnet101.onnx --precisionConstraints --workspace=2048 --minShapes=input:1x3x256x256 --optShapes=input:1x3x1026x1282 --maxShapes=input:1x3x1140x2560 --buildOnly --saveEngine=fcn-resnet101.trt
Is it normal ? And, is there an Python api to determine whether the engine is fp32 or fp16?
Thanks !
Environment
TensorRT Version: 8.5.1.7
GPU Type: NVIDIA TITAN Xp
Nvidia Driver Version: 470.63.01
CUDA Version: 10.2
CUDNN Version: 8.6.0
Operating System + Version: Ubuntu 16.04.6 LTS (GNU/Linux 4.4.0-142-generic x86_64)
Python Version (if applicable): 3.8.15
TensorFlow Version (if applicable):
PyTorch Version (if applicable): 1.12.1
Baremetal or Container (if container which image + tag):
Relevant Files
# download and save image
from PIL import Image
from io import BytesIO
import requests
import os
output_image="input.ppm"
# Read sample image input and save it in ppm format
# print("Exporting ppm image {}".format(output_image))
# response = requests.get("https://pytorch.org/assets/images/deeplab1.png")
# with Image.open(BytesIO(response.content)) as img:
# ppm = Image.new("RGB", img.size, (255, 255, 255))
# ppm.paste(img, mask=img.split()[3])
# ppm.save(output_image)
import torch
import torch.nn as nn
FP16 = False
if FP16:
output_onnx = 'fcn-resnet101-fp16.onnx'
else:
output_onnx = 'fcn-resnet101-fp32.onnx'
class FCN_ResNet101(nn.Module):
def __init__(self):
super(FCN_ResNet101, self).__init__()
self.model = torch.hub.load('pytorch/vision:v0.6.0', 'fcn_resnet101', pretrained=True)
def forward(self, inputs):
x = self.model(inputs)['out']
x = x.argmax(1, keepdims=True)
return x
if FP16:
model = FCN_ResNet101().cuda().half()
input_tensor = torch.rand(4, 3, 224, 224).cuda().half()
else:
model = FCN_ResNet101().cuda()
input_tensor = torch.rand(4, 3, 224, 224).cuda()
model.eval()
print('exporting ONNX model {}'.format(output_onnx))
torch.onnx.export(model, input_tensor, output_onnx,
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: "batch", 2: 'height', 3: "width"},
'output':{0: 'batch', 2: 'height', 3: 'width'}},
verbose=False)
print('exporting finished')
print('converting onnx model to trt model')
output_trt = '{}.trt'.format(output_onnx.split('.')[0])
# trtexec --onnx=fcn-resnet101.onnx --fp16 --workspace=64 --minShapes=input:1x3x256x256 --optShapes=input:1x3x1026x1282 --maxShapes=input:1x3x1440x2560 --buildOnly --saveEngine=fcn-resnet101.engine
if FP16:
os.system(f'/mnt/users/TensorRT-8.5.1.7/bin/trtexec --onnx={output_onnx} --fp16 --precisionConstraints --workspace=2048 --minShapes=input:1x3x256x256 --optShapes=input:1x3x1026x1282 --maxShapes=input:1x3x1140x2560 --buildOnly --saveEngine={output_trt}')
else:
os.system(f'/mnt/users/TensorRT-8.5.1.7/bin/trtexec --onnx={output_onnx} --workspace=2048 --minShapes=input:1x3x256x256 --optShapes=input:1x3x1026x1282 --maxShapes=input:1x3x1140x2560 --buildOnly --saveEngine={output_trt}')
#!trtexec --onnx=resnet50_pytorch.onnx --saveEngine=resnet_engine_pytorch.trt --explicitBatch --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw --fp16
print('converting finished')
Steps To Reproduce
Run the above code and check the generated engine size (by using ls -l
).