Hi,
I’m trying to run TensorRT inference with python multiprocessing. Whenever there is a call to context.execute_async
, I get the error Error Code 1: Cudnn (CUDNN_STATUS_MAPPING_ERROR)
which gets repeatedly printed on the screen on my Xavier NX with versions:
python: 3.8.12
cuda: 11.4
cudnn: 8.6.0
TensorRT: 8.5.3.2
Jetpack: 5.1.3
I have looked at different places to incorporate the suggestions like defining a custom cuda context
The engine loads successfully and this doesn’t happen outside a mult-processing/ multi-threading scenario (works well normally)
Although I can’t share the engine file and the actual code, my code skeleton looks like this
import numpy as np
import pycuda.driver as cuda
import tensorrt as trt
class TensorRTModel:
def __init__(self, engine_path: str, batch_size: int =1):
cuda.init()
self.engine_path = engine_path
self.batch_size = batch_size
trt_logger = trt.Logger(trt.Logger.INFO)
# Load engine
self.engine, self.context = self.load_engine(
engine_path=engine_path,
logger=trt_logger,
)
device = cuda.Device(0)
self.cuda_context = device.make_context()
self.stream = cuda.Stream()
# inputs and outputs are classes each with variables for host_memory and device_memory
self.inputs, self.outputs, self.bindings = self.allocate_buffers()
def predict(self, x):
np.copyto(self.inputs[0].host_memory, x.ravel())
self.cuda_context.push()
for input in self.inputs:
cuda.memcpy_htod_async(
input.device_memory,
input.host_memory,
self.stream
)
# Run inference
# This is where the error originates from
self.context.execute_async(
bindings=self.bindings,
stream_handle=self.stream.handle
) # tried execute_async_v2 too
# Transfer prediction output from the GPU.
for output in self.outputs:
cuda.memcpy_dtoh_async(
output.host_memory,
output.device_memory,
self.stream
)
# Synchronize the stream
self.stream.synchronize()
self.cuda_context.pop()
def __del__(self):
self.cuda_context.detach()
del self.cuda_context
and the main file that drives the multi-processing inference looks something like this:
import argparse
from time import time
import numpy as np
import multiprocessing
from tensorrt_model import TensorRTModel
def benchmark(
model_path: str,
tag: str,
return_dict: dict,
batch_size:int = 1,
num_tests: int = 100,
warm_up: int = 10
):
model_fn = TensorRTModel(model_path=model_path)
inp = np.random.rand(1, 640, 640, 3).astype(np.float32)
time_list = list()
for _ in range(warm_up):
model_fn.predict(inp)
for _ in range(num_tests):
t_start = time()
model_fn.predict(inp)
t_end = time()
t_elapsed = (t_end - t_start)
time_list.append(t_elapsed)
del model_fn
avg_time = np.mean(time_list)
return_dict[tag]=avg_time
def main():
try:
multiprocessing.set_start_method('spawn', force=True)
print(f"Multiprocessing start method: SPAWN")
except:
pass
parser = argparse.ArgumentParser(
'multi-process benchmarking for TensorRT models',
description='This script performs benchmarking of TensorRT models in a multi-processed fashion'
)
parser.add_argument(
'-n',
'--num-process',
dest='num_parallel_processes',
type=int,
default=1,
help='Number of parallel process to run benchmarking on'
)
parser.add_argument(
'-m',
'--model-path',
dest='model_path',
type=str,
default='model.engine',
help='Path to the model/ TensorRT Engine'
)
parser.add_argument(
'-b',
'--batch-size',
dest='batch_size',
type=int,
default=1,
help='batch size'
)
args = parser.parse_args()
manager = multiprocessing.Manager()
return_dict = manager.dict()
processes = list()
for idx, i in enumerate(range(args.num_parallel_processes)):
p = multiprocessing.Process(target=benchmark, args=(args.model_path, f"process-{idx}", return_dict, args.batch_size,))
processes.append(p)
p.start()
for p in processes:
p.join()
print(return_dict)
if __name__=='__main__':
main()
Any help in solving the aforementioned error is appreciated. Thanks in advance…