Description
hi,
I have an onnx model(the file size is 282M).
After converting to tensorrt model, the final trt file is 739M .
Why is the trt file so much larger than the onnx file?
Any suggestions?
Thanks!
Environment
TensorRT Version: v7.1.3.4
GPU Type: 1080Ti
Nvidia Driver Version: 455.45
CUDA Version: 11.0
CUDNN Version: 8.5
Operating System + Version: ubuntu 18.04
Python Version (if applicable): python3.6
PyTorch Version (if applicable): torch1.7
python code
import tensorrt as trt
import os
TRT_LOGGER = trt.Logger()
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
def get_engine(onnx_file_path, engine_file_path=""):
"""Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
def build_engine():
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
#with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = 1 << 30 # 256MiB
builder.max_batch_size = 1
# Parse model file
if not os.path.exists(onnx_file_path):
print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
exit(0)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
network.get_input(0).shape = [1, 3, 1024, 1224]
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
engine = builder.build_cuda_engine(network)
print("Completed creating Engine")
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
return engine
def main():
onnx_file = "/data3/deeplearning/output/iter_160000.pth.onnx"
trt_file = "/data3/deeplearning/output/iter_160000.pth.onnx.trt"
get_engine(onnx_file, trt_file)
print("FINISH")
if __name__ == '__main__':
main(