Description
Getting this error while trying to convert ONNX model to TensorRT engine. The original model is a table transformer PyTorch model which is converted to ONNX.
Initially I converted PyTorch model to ONNX without dynamic axes and the conversion to TensorRT engine worked fine. After adding the dynamic axes parameter I get this error
Environment
TensorRT Version: 10.1.0
GPU Type: Nvidia T4
Nvidia Driver Version: 535
CUDA Version: 12.2
CUDNN Version: 8.9.7
Operating System + Version: Ubuntu 22.04
Python Version (if applicable): 3.10.12
PyTorch Version (if applicable): 2.1.0+cu121
Baremetal or Container (if container which image + tag):
PyTorch model to ONNX conversion
import torch
import sys
import os
class NomNom:
def __init__(self, torch_model):
self.torch_model = torch_model.eval()
def tatr_onnx(self):
dummy = self.tatr_inputs()
dynamic_axes = {
"samples": {1: "width", 2: "height"}
}
torch.onnx.export(self.torch_model,
dummy,
"TATR_v3.onnx",
export_params=True,
input_names=['samples'],
output_names=['out'],
dynamic_axes=dynamic_axes
)
# export_options = torch.onnx.ExportOptions(dynamic_shapes=True)
# torch.onnx.dynamo_export(self.torch_model, dummy, export_options=export_options).save("TATR.onnx")
def tatr_inputs(self, W=800, H=1000):
tensor = torch.randn(3, W, H)
return [tensor]
def change_cwd():
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
def TATR_optimizer():
sys.path.append("src")
from src.model_loader import model
torch_model = model()
change_cwd()
trial = NomNom(torch_model)
trial.tatr_onnx()
# onnx_model = onnx.load("TATR.onnx")
# onnx.checker.check_model(onnx_model)
if __name__=="__main__":
TATR_optimizer()
ONNX to TensorRT conversion
def build_engine_onnx(model_file):
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(0)
config = builder.create_builder_config()
profile = builder.create_optimization_profile();
profile.set_shape("samples", (3, 80, 80), (3, 800, 1000), (3, 1000, 1000))
config.add_optimization_profile(profile)
parser = trt.OnnxParser(network, TRT_LOGGER)
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, common.GiB(1))
# Load the Onnx model and parse it in order to populate the TensorRT network.
with open(model_file, "rb") as model:
if not parser.parse(model.read()):
print("ERROR: Failed to parse the ONNX file.")
for error in range(parser.num_errors):
print(parser.get_error(error))
return None
engine_bytes = builder.build_serialized_network(network, config)
runtime = trt.Runtime(TRT_LOGGER)
return runtime.deserialize_cuda_engine(engine_bytes)
Output
[09/13/2024-09:50:09] [TRT] [E] Error Code: 9: Skipping tactic 0x0000000000000000 due to exception No Myelin Error exists
[09/13/2024-09:50:09] [TRT] [E] Error Code: 10: Could not find any implementation for node {ForeignNode[/Cast_10.../backbone/backbone.0/Cast]}.
[09/13/2024-09:50:09] [TRT] [E] IBuilder::buildSerializedNetwork: Error Code 10: Internal Error (Could not find any implementation for node {ForeignNode[/Cast_10.../backbone/backbone.0/Cast]}.)
Traceback (most recent call last):
File "/home/smartops/Desktop/table-transformer-trt/TRTNvidia.py", line 168, in <module>
trt_inference_wrapper = TRTInference(trt_engine_path,
File "/home/smartops/Desktop/table-transformer-trt/TRTNvidia.py", line 23, in __init__
engine = self.build_engine_onnx(trt_engine_path)
File "/home/smartops/Desktop/table-transformer-trt/TRTNvidia.py", line 91, in build_engine_onnx
return runtime.deserialize_cuda_engine(engine_bytes)
TypeError: deserialize_cuda_engine(): incompatible function arguments. The following argument types are supported:
1. (self: tensorrt.tensorrt.Runtime, serialized_engine: buffer) -> tensorrt.tensorrt.ICudaEngine
2. (self: tensorrt.tensorrt.Runtime, stream_reader: tensorrt.tensorrt.IStreamReader) -> tensorrt.tensorrt.ICudaEngine
Invoked with: <tensorrt.tensorrt.Runtime object at 0x7160cc5970f0>, None
-------------------------------------------------------------------
PyCUDA ERROR: The context stack was not empty upon module cleanup.
-------------------------------------------------------------------
A context was still active when the context stack was being
cleaned up. At this point in our execution, CUDA may already
have been deinitialized, so there is no way we can finish
cleanly. The program will be aborted now.
Use Context.pop() to avoid this problem.
-------------------------------------------------------------------
Aborted (core dumped)