Hi @dusty_nv
cv_bridge not being the built correctly for Python 3.6 really was the problem.
Now, I am writing an inference ROS node. I would like to initialize the trt engine and the execution context during the initialization of the node because they take up quite some time and then do inferencing in a callback where I subscribe to an image topic. However somehow this does not really work.
The I am getting the following error message:
File "/catkin_ws/src/arc_bridge_detection/src/bridge_detection_node.py", line 146, in processImages
cuda.memcpy_htod(self.input_binds[0]['allocation'], np.ascontiguousarray(img_data))
pycuda._driver.LogicError: cuMemcpyHtoD failed: invalid device context
So somehow I cannot access the engine/context inside the ROS callback.
Below is my (very simple) code:
#!/usr/bin/env python3
import ctypes
# Load mmdeploy custom plugins for TRT
ctypes.CDLL("/mmdeploy_installs/mmdeploy/build/lib/libmmdeploy_tensorrt_ops.so")
# Inferencing and image processing
import tensorrt as trt
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
# ROS
import rospy
import rospkg
from sensor_msgs.msg import Image
import message_filters
from cv_bridge import CvBridge, CvBridgeError
# General
import numpy as np
import os
from os import listdir
import time
rospy.loginfo("Imports done!")
class BridgeDetectionNode:
def __init__(self, engine_file):
# engine_file: The path to a serialized TensorRT engine to load from disk
### General initializations ######################################################################
self.class_names=['Can', 'Carton/Paper', 'GlassBottle',
'Other', 'PlasticBottle', 'PlasticOther', 'Wrapper']
### TRT Inferencing initializations###############################################################
# Define TRT Log level
self.logger = trt.Logger(trt.Logger.ERROR)
# Initialize libnvinfer plugins
trt.init_libnvinfer_plugins(self.logger, namespace="")
# Load TRT engine
rospy.loginfo("1")
print("Reading engine from file {}".format(engine_file))
with open(engine_file, "rb") as f, trt.Runtime(self.logger) as runtime:
self.engine = runtime.deserialize_cuda_engine(f.read())
rospy.loginfo("2")
# Create execution context
self.context = self.engine.create_execution_context()
rospy.loginfo("3")
# Extract info about input and output bindings of the model
self.input_binds = []
self.output_binds = []
self.allocations = []
for binding in self.engine:
binding_idx = self.engine.get_binding_index(binding)
name = self.engine.get_binding_name(binding_idx) # Bindings correspond to the input and outputs of the network
dtype = self.engine.get_binding_dtype(binding_idx)
shape = self.engine.get_binding_shape(binding_idx)
size = np.dtype(trt.nptype(dtype)).itemsize
for s in shape:
size *= s
allocation = cuda.mem_alloc(size)
bind_description = {
'index': binding_idx,
'name': name,
'dtype': np.dtype(trt.nptype(dtype)),
'shape': list(shape),
'allocation': allocation,
}
self.allocations.append(allocation)
if self.engine.binding_is_input(binding):
self.input_binds.append(bind_description)
else:
self.output_binds.append(bind_description)
# Specify shape and size of output buffer
self.output_spec = []
for o in self.output_binds:
self.output_spec.append((o['shape'], o['dtype']))
# Create output buffer
self.outputs = []
for shape, dtype in self.output_spec:
self.outputs.append(np.zeros(shape, dtype))
rospy.loginfo("Initialized TRT for Inferencing")
### ROS initializations #######################################################################
self.bridge = CvBridge()
self.sub_rgb = rospy.Subscriber("/image_broker/camera/image_raw",Image, self.processImages)
rospy.loginfo("Initialized ROS")
### Functions ####
def processImages(self, img_msg):
# Convert image_msg to cv image
try:
cv_img = self.bridge.imgmsg_to_cv2(img_msg, desired_encoding="bgra8")
except CvBridgeError as e:
print(e)
# preprocess the image for inferencing
self.img_data = self.preprocess(cv_img)
rospy.loginfo(self.img_data)
cuda.memcpy_htod(self.input_binds[0]['allocation'], np.ascontiguousarray(img_data))
self.context.execute_v2(self.allocations)
for o in range(len(self.outputs)):
cuda.memcpy_dtoh(self.outputs[o], self.output_binds[o]['allocation'])
def preprocess(self, image):
# Mean normalization
mean = np.array([0.485, 0.456, 0.406]).astype('float32')
stddev = np.array([0.229, 0.224, 0.225]).astype('float32')
image = np.asarray(image).astype('float32')
# The Camera has an alpha channel (rgba), let's leave it away (-->rgb)
image = image[:,:,:3]
image = (image / float(255.0) - mean) / stddev
# Switch from HWC to to CHW order
image = np.moveaxis(image, 2, 0)
image = image[np.newaxis,...]
return image
if __name__ == '__main__':
rospy.init_node('bridge_detection', anonymous=False)
engine_file = "/mmdeploy_installs/mmdeploy/exports/tensorrt_22_05_17_fp16_static_480x640/end2end.engine"
bridge_detection_node = BridgeDetectionNode(engine_file)
rospy.spin()
I am struggling to find a solution to this… could you maybe advise me on how to overcome this issue. What is the best practice to declare the engine, and execution context using ROS?
Thanks a lot!