How to run the deepstream model locally in jetson?

Koushik-2K · July 6, 2024, 7:29am

Platform and Versions: using Xavier NX with DeepStream 6.3, TensorRT 8.5.2.2, and CUDA 11.4.
Objective: Run a DeepStream application locally on the Jetson without using a pipeline or probe function.
** Requirements:
The requirement i need is that I would like to run the deepstream app locally in the jetson without using pipeline and without any probe function for the LPD Model. My aim is to load the images from the local folder and to get inference with model’s detection and have to save those inference images into a folder.

I tried to run the by using the LPD tensorRT with the below code but getting error : Failed to deserialize the engine and failing to load tensorRt engine , while I ensured with correct path and correct file format but still facing issue to load the model itself.

def __init__(self, host_mem, device_mem):
    self.host = host_mem
    self.device = device_mem
def __str__(self):
    return f"Host:\n{self.host}\nDevice:\n{self.device}"
def __repr__(self):
    return self.__str__()

class TrtModel:
def init(self, engine_path, max_batch_size=1, dtype=np.float32):
self.engine_path = engine_path
self.dtype = dtype
self.logger = trt.Logger(trt.Logger.WARNING)
self.runtime = trt.Runtime(self.logger)
self.engine = self.load_engine(self.runtime, self.engine_path)
if self.engine is None:
raise RuntimeError(“Failed to load the TensorRT engine.”)
self.max_batch_size = max_batch_size
self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers()
self.context = self.engine.create_execution_context()
@staticmethod
def load_engine(trt_runtime, engine_path):
if not os.path.exists(engine_path):
raise FileNotFoundError(f"Engine file not found at {engine_path}“)
trt.init_libnvinfer_plugins(None, “”)
with open(engine_path, ‘rb’) as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
if engine is None:
raise RuntimeError(“Failed to deserialize the CUDA engine.”)
return engine
def allocate_buffers(self):
inputs, outputs, bindings = , ,
stream = cuda.Stream()
for binding in self.engine:
size = trt.volume(self.engine.get_binding_shape(binding)) * self.max_batch_size
host_mem = cuda.pagelocked_empty(size, self.dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if self.engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def call(self, x: np.ndarray, batch_size=1):
x = x.astype(self.dtype)
np.copyto(self.inputs[0].host, x.ravel())
for inp in self.inputs:
cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
self.context.execute_async(batch_size=batch_size, bindings=self.bindings, stream_handle=self.stream.handle)
for out in self.outputs:
cuda.memcpy_dtoh_async(out.host, out.device, self.stream)
self.stream.synchronize()
return [out.host.reshape(batch_size, -1) for out in self.outputs]
def area_of(left_top, right_bottom):
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[…, 0] * hw[…, 1]
def iou_of(boxes0, boxes1, eps=1e-5):
overlap_left_top = np.maximum(boxes0[…, :2], boxes1[…, :2])
overlap_right_bottom = np.minimum(boxes0[…, 2:], boxes1[…, 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[…, :2], boxes0[…, 2:])
area1 = area_of(boxes1[…, :2], boxes1[…, 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked =
indexes = np.argsort(scores)[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(rest_boxes, np.expand_dims(current_box, axis=0))
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
picked_box_probs =
picked_labels =
for class_index in range(1, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > prob_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = boxes[mask, :]
box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(box_probs, iou_threshold=iou_threshold, top_k=top_k)
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if not picked_box_probs:
return np.array(), np.array(), np.array()
picked_box_probs = np.concatenate(picked_box_probs)
picked_box_probs[:, 0] *= width
picked_box_probs[:, 1] *= height
picked_box_probs[:, 2] *= width
picked_box_probs[:, 3] *= height
return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
batch_size = 1
threshold = 0.2
trt_engine_path = “LPDNet/yolov4_tiny_usa_deployable.etlt_b16_gpu0_fp16.engine”
input_folder = “input_images”
output_folder = “detected_number_plates”
try:
model = TrtModel(trt_engine_path)
shape = model.engine.get_binding_shape(0)
for filename in os.listdir(input_folder):
if filename.endswith(”.jpg") or filename.endswith(“.png”):
image_path = os.path.join(input_folder, filename)
frame = cv2.imread(image_path)
frame = imutils.rotate_bound(frame, 270)
h, w, _ = frame.shape
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image1 = cv2.resize(image, (320, 240))
image_mean = np.array([127, 127, 127])
image = (image1 - image_mean) / 128
image = np.transpose(image, [2, 0, 1])
image = np.expand_dims(image, axis=0).astype(np.float32)
confidences, boxes = model(image, batch_size)
boxes, labels, probs = predict(image1.shape[1], image1.shape[0], confidences, boxes, threshold)
for i, box in enumerate(boxes):
x1, y1, x2, y2 = box
plate_image = frame[y1:y2, x1:x2]
output_path = os.path.join(output_folder, f"{filename.split(‘.’)[0]}_{i}.jpg")
cv2.imwrite(output_path, plate_image)
print(“Detection and saving completed.”)
except Exception as e:
print(f"Error: {e}")

AakankshaS · July 11, 2024, 8:38am

Hi @Koushik-2K ,
I would recommend you to treach out to Deepstream or Jetson Forum for better assistance

Thanks

Koushik-2K · July 12, 2024, 4:42am

Thank you for your reply will make the change

Topic		Replies	Views
How to run the deepstream model locally in jetson? DeepStream SDK	3	131	August 9, 2024
Can't get TLT trained model get to work on Deepstream - Jetson (NX) TAO Toolkit	2	914	October 12, 2021
How to build my own deepstream project Jetson Xavier NX	5	465	September 12, 2021
How do we write business logic on top of the model trained with TLT? DeepStream SDK tensorrt , opencv , ubuntu , jetson-inference , python	3	408	October 12, 2021
Industrial defect detection DeepStream SDK	2	490	October 12, 2021
Can't get TLT trained model get to work on Deepstream - Jetson (NX) DeepStream SDK	4	1274	October 12, 2021
Can not run inference for 1 image in Jetson Xavier NX, but I can run on PC? DeepStream SDK jetson-inference	9	560	May 22, 2023
DeepStream run it on a single image? DeepStream SDK	8	1961	October 12, 2021
squeezenet with Deepstream General	5	1536	January 29, 2018
Human Pose detection model - Isses with converted model output in Deepstream DeepStream SDK tensorrt , onnx , deepstream	14	1615	September 19, 2022

How to run the deepstream model locally in jetson?

Related topics