Object Detection inference problem: image updates but bounding box (bbox) is fixed to bbox from first frame

sangminsuh · May 7, 2025, 4:25am

system: jetson agx orin 64 dev kit.
env: docker container

import os
import cv2
import time
import json
import onnx
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import tensorflow as tf
from easydict import EasyDict
import numpy as np
import matplotlib.pyplot as plt

cfg_coco = EasyDict()
cfg_coco.idx2cls = {
‘1’: ‘person’, ‘2’: ‘bicycle’, ‘3’: ‘car’, ‘4’: ‘motorcycle’, ‘5’: ‘airplane’,
‘6’: ‘bus’, ‘7’: ‘train’, ‘8’: ‘truck’, ‘9’: ‘boat’, ‘10’: ‘traffic light’,
‘11’: ‘fire hydrant’, ‘13’: ‘stop sign’, ‘14’: ‘parking meter’, ‘15’: ‘bench’, ‘16’: ‘bird’,
‘17’: ‘cat’, ‘18’: ‘dog’, ‘19’: ‘horse’, ‘20’: ‘sheep’, ‘21’: ‘cow’,
‘22’: ‘elephant’, ‘23’: ‘bear’, ‘24’: ‘zebra’, ‘25’: ‘giraffe’, ‘27’: ‘backpack’,
‘28’: ‘umbrella’, ‘31’: ‘handbag’, ‘32’: ‘tie’, ‘33’: ‘suitcase’, ‘34’: ‘frisbee’,
‘35’: ‘skis’, ‘36’: ‘snowboard’, ‘37’: ‘sports ball’, ‘38’: ‘kite’, ‘39’: ‘baseball bat’,
‘40’: ‘baseball glove’, ‘41’: ‘skateboard’, ‘42’: ‘surfboard’, ‘43’: ‘tennis racket’, ‘44’: ‘bottle’,
‘46’: ‘wine glass’, ‘47’: ‘cup’, ‘48’: ‘fork’, ‘49’: ‘knife’, ‘50’: ‘spoon’,
‘51’: ‘bowl’, ‘52’: ‘banana’, ‘53’: ‘apple’, ‘54’: ‘sandwich’, ‘55’: ‘orange’,
‘56’: ‘broccoli’, ‘57’: ‘carrot’, ‘58’: ‘hot dog’, ‘59’: ‘pizza’, ‘60’: ‘donut’,
‘61’: ‘cake’, ‘62’: ‘chair’, ‘63’: ‘couch’, ‘64’: ‘potted plant’, ‘65’: ‘bed’,
‘67’: ‘dining table’, ‘70’: ‘toilet’, ‘72’: ‘tv’, ‘73’: ‘laptop’, ‘74’: ‘mouse’,
‘75’: ‘remote’, ‘76’: ‘keyboard’, ‘77’: ‘cell phone’, ‘78’: ‘microwave’, ‘79’: ‘oven’,
‘80’: ‘toaster’, ‘81’: ‘sink’, ‘82’: ‘refrigerator’, ‘84’: ‘book’, ‘85’: ‘clock’,
‘86’: ‘vase’, ‘87’: ‘scissors’, ‘88’: ‘teddy bear’, ‘89’: ‘hair drier’, ‘90’: ‘toothbrush’
}
cfg_coco.n_class = len(cfg_coco.idx2cls)
cfg_coco.n_anchors = 4
cfg_coco.box_params = 4
cfg_coco.obj_params = 1
cfg_coco.pred_dim = cfg_coco.box_params + cfg_coco.obj_params + cfg_coco.n_class
cfg_coco.clip_bound = 2.
cfg_coco.scale_offset = 0.5

cfg_coco.image_size = (512, 512, 3)
cfg_coco.grid_sizes = [64, 32, 16]

cfg_coco.max_output_size = 100
cfg_coco.iou_threshold = 0.5
cfg_coco.score_threshold = 0.5

cfg = cfg_coco

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

def extract_onnx_io(onnx_path):
model = onnx.load(onnx_path)
graph = model.graph
def shape_from_tensor(tensor):
return [dim.dim_value if dim.dim_value>0 else -1
for dim in tensor.type.tensor_type.shape.dim]
inputs = [{“name”: inp.name, “shape”: shape_from_tensor(inp)}
for inp in graph.input
if inp.name not in {init.name for init in graph.initializer}]
outputs = [{“name”: out.name, “shape”: shape_from_tensor(out)}
for out in graph.output]
return {“inputs”: inputs, “outputs”: outputs}

ENGINE_PATH = “../engine_models/OD_coco.engine”
def build_engine(onnx_path, io_info):
if os.path.exists(ENGINE_PATH):
print(f"Loading serialized engine from {ENGINE_PATH}“)
with open(ENGINE_PATH, “rb”) as f:
return trt.Runtime(TRT_LOGGER).deserialize_cuda_engine(f.read())
print(“Building TensorRT engine…”)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, TRT_LOGGER)
with open(onnx_path, ‘rb’) as f:
if not parser.parse(f.read()):
for i in range(parser.num_errors):
print(parser.get_error(i))
raise RuntimeError(“ONNX parsing failed”)
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 28) # 256MiB
config.set_flag(trt.BuilderFlag.FP16) # mode
# optimization profile: batch=1 fixed
profile = builder.create_optimization_profile()
inp = io_info[“inputs”][0]
shape = inp[“shape”][:]
shape[0] = 1
profile.set_shape(inp[“name”], tuple(shape), tuple(shape), tuple(shape))
config.add_optimization_profile(profile)
serialized = builder.build_serialized_network(network, config)
with open(ENGINE_PATH, “wb”) as f:
f.write(serialized)
print(f"Engine serialized to {ENGINE_PATH}”)
return trt.Runtime(TRT_LOGGER).deserialize_cuda_engine(serialized)

def allocate_buffers(engine, context):
inputs, outputs, bindings = , ,
stream = cuda.Stream()
for idx in range(engine.num_bindings):
name = engine.get_binding_name(idx)

    # dynamic shape 처리
    if engine.binding_is_input(idx):
        shape = context.get_binding_shape(idx)
    else:
        shape = engine.get_binding_shape(idx)

    dtype = trt.nptype(engine.get_binding_dtype(idx))
    size  = abs(int(np.prod(shape)))
    host_mem = cuda.pagelocked_empty(size, dtype)
    dev_mem  = cuda.mem_alloc(host_mem.nbytes)
    bindings.append(int(dev_mem))
    if engine.binding_is_input(idx):
        inputs.append((name, host_mem, dev_mem, tuple(shape)))
    else:
        outputs.append((name, host_mem, dev_mem, tuple(shape)))
return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream):
for _, h, d, _ in inputs:
cuda.memcpy_htod_async(d, h, stream)
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
for _, h, d, _ in outputs:
cuda.memcpy_dtoh_async(h, d, stream)
stream.synchronize()
return {name: h.reshape(shape) for name, h, _, shape in outputs}

def decode_predictions_multiscale(preds, cfg):
boxes =
for grid in cfg.grid_sizes:
out = preds[str(grid)][0] # (grid,grid,anchors*pred_dim)
out = out.reshape((grid, grid, cfg.n_anchors, cfg.pred_dim))
cell = cfg.image_size[0] / grid
for i in range(grid):
for j in range(grid):
for a in range(cfg.n_anchors):
p = out[i,j,a]
score = p[4]
if score < cfg.score_threshold: continue
tx,ty,tw,th = p[:4]
cx,cy = (j+tx)*cell, (i+ty)*cell
tw = np.clip(tw, -cfg.clip_bound, cfg.clip_bound)
th = np.clip(th, -cfg.clip_bound, cfg.clip_bound)
bw = (np.exp(tw)-cfg.scale_offset)*cfg.image_size[0]
bh = (np.exp(th)-cfg.scale_offset)*cfg.image_size[1]
xmin, ymin = cx-bw/2, cy-bh/2
xmax, ymax = cx+bw/2, cy+bh/2
xmin, xmax = np.clip([xmin,xmax],0,cfg.image_size[0])
ymin, ymax = np.clip([ymin,ymax],0,cfg.image_size[1])
if xmax<=xmin or ymax<=ymin: continue
cls = int(np.argmax(p[5:]))
boxes.append([xmin,ymin,xmax,ymax, cls, score])
return boxes

def apply_nms(boxes, cfg):
if not boxes: return
arr = np.array(boxes, dtype=np.float32)
b = arr[:,:4]
scores = arr[:,5]
classes = arr[:,4].astype(np.int32)
idx, sc = tf.image.non_max_suppression_with_scores(
boxes=b, scores=scores,
max_output_size=cfg.max_output_size,
iou_threshold=cfg.iou_threshold,
score_threshold=cfg.score_threshold
)
idx = idx.numpy()
return [boxes[i] for i in idx]

ONNX_PATH = “../onnx_models/OD_coco.onnx”
def main():

io_info = extract_onnx_io(ONNX_PATH)
print("ONNX IO:", json.dumps(io_info, indent=2))
engine = build_engine(ONNX_PATH, io_info)

context = engine.create_execution_context()
context.set_binding_shape(0, (1, 512, 512, 3))  # NHWC 기준
inputs, outputs, bindings, stream = allocate_buffers(engine, context)

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

classes = list(cfg.idx2cls.values())
cmap = plt.get_cmap('tab20', cfg.n_class)
colors = [(int(c[2]*255),int(c[1]*255),int(c[0]*255)) for c in cmap.colors]

while True:
    ret, frame = cap.read()
    if not ret:
        break

    disp_frame = frame.copy()

    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
    h0, w0 = img.shape[:2]
    img_resized = cv2.resize(img, cfg.image_size[:2][::-1])

    np.copyto(inputs[0][1], img_resized.ravel())

    start = time.time()
    trt_outs = do_inference(context, bindings, inputs, outputs, stream)
    duration = time.time() - start

    for name, arr in trt_outs.items():
        flat = arr.flatten()
        print(f"[RAW] {name} head5:", flat[:5]) 

    preds  = {name: trt_outs[name] for name in trt_outs}
    boxes  = decode_predictions_multiscale(preds, cfg)
    boxes  = apply_nms(boxes, cfg)

    for xmin, ymin, xmax, ymax, cls, score in boxes:
        sx = w0 / cfg.image_size[1]
        sy = h0 / cfg.image_size[0]
        x1, y1 = int(xmin * sx), int(ymin * sy)
        x2, y2 = int(xmax * sx), int(ymax * sy)
        color = colors[int(cls)]
        cv2.rectangle(disp_frame, (x1, y1), (x2, y2), color, 2)
        txt = f"{classes[int(cls)]}:{score:.2f}"
        cv2.putText(disp_frame, txt, (x1, max(y1-10,0)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

    cv2.putText(disp_frame, f"FPS: {1/duration:.1f}", (10,30),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0), 2)

    cv2.imshow("TRT Object Detection", disp_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()

if name == “main”:
main()

I am experimenting with tensorRT inference in jetson using the above code. I have done custom object detection on the server and saw the result, and after changing the model to onnx, I am doing inference on jetson. However, the image obtained from the USB camera is continuously changing, but the bounding box detected is the same box obtained in the first frame. Then I get Cuda Runtime (invalid resource handle) as below. do_inference() is not working properly. I have been looking at it for a few days, but I can’t figure out the cause. can you help me?

[RAW] 16 head5: [ 6.3183594e-01 7.1289062e-01 -5.9179688e-01 -5.7373047e-01
4.8398972e-04]
[RAW] 32 head5: [ 8.5986328e-01 5.1562500e-01 -4.3579102e-01 -4.8779297e-01
7.5531006e-04]
[RAW] 64 head5: [ 0.6069336 0.47436523 -0.42138672 -0.39257812 0.00462723]
[05/07/2025-04:14:45] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (invalid resource handle)
[RAW] 16 head5: [ 6.3183594e-01 7.1289062e-01 -5.9179688e-01 -5.7373047e-01
4.8398972e-04]
[RAW] 32 head5: [ 8.5986328e-01 5.1562500e-01 -4.3579102e-01 -4.8779297e-01
7.5531006e-04]
[RAW] 64 head5: [ 0.6069336 0.47436523 -0.42138672 -0.39257812 0.00462723]
[05/07/2025-04:14:46] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (invalid resource handle)
[RAW] 16 head5: [ 6.3183594e-01 7.1289062e-01 -5.9179688e-01 -5.7373047e-01
4.8398972e-04]
[RAW] 32 head5: [ 8.5986328e-01 5.1562500e-01 -4.3579102e-01 -4.8779297e-01
7.5531006e-04]
[RAW] 64 head5: [ 0.6069336 0.47436523 -0.42138672 -0.39257812 0.00462723]
[05/07/2025-04:14:46] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (invalid resource handle)
[RAW] 16 head5: [ 6.3183594e-01 7.1289062e-01 -5.9179688e-01 -5.7373047e-01
4.8398972e-04]
[RAW] 32 head5: [ 8.5986328e-01 5.1562500e-01 -4.3579102e-01 -4.8779297e-01
7.5531006e-04]
[RAW] 64 head5: [ 0.6069336 0.47436523 -0.42138672 -0.39257812 0.00462723]
[05/07/2025-04:14:46] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (invalid resource handle)
[RAW] 16 head5: [ 6.3183594e-01 7.1289062e-01 -5.9179688e-01 -5.7373047e-01
4.8398972e-04]
[RAW] 32 head5: [ 8.5986328e-01 5.1562500e-01 -4.3579102e-01 -4.8779297e-01
7.5531006e-04]
[RAW] 64 head5: [ 0.6069336 0.47436523 -0.42138672 -0.39257812 0.00462723]
[05/07/2025-04:14:46] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (invalid resource handle)
[RAW] 16 head5: [ 6.3183594e-01 7.1289062e-01 -5.9179688e-01 -5.7373047e-01
4.8398972e-04]
[RAW] 32 head5: [ 8.5986328e-01 5.1562500e-01 -4.3579102e-01 -4.8779297e-01
7.5531006e-04]
[RAW] 64 head5: [ 0.6069336 0.47436523 -0.42138672 -0.39257812 0.00462723]
[05/07/2025-04:14:46] [TRT] [E] 1: [genericReformat.cuh::copyVectorizedRunKernel::1579] Error Code 1: Cuda Runtime (invalid resource handle)
[RAW] 16 head5: [ 6.3183594e-01 7.1289062e-01 -5.9179688e-01 -5.7373047e-01
4.8398972e-04]
[RAW] 32 head5: [ 8.5986328e-01 5.1562500e-01 -4.3579102e-01 -4.8779297e-01
7.5531006e-04]
[RAW] 64 head5: [ 0.6069336 0.47436523 -0.42138672 -0.39257812 0.00462723]

SivaRamaKrishnaNV · May 7, 2025, 5:32pm

Dear @sangminsuh ,
Could you please share the docker container environment , TRT/jetpack details to reproduce the issue

Topic		Replies	Views
Inference of model using tensorflow/onnxruntime and TensorRT gives different result Jetson TX2 tensorrt	20	2528	October 18, 2021
Get wrong result when I using tensorRT to do inference, am I wrong to use ? Jetson TX2	18	2989	October 18, 2021
X264 and TensorRT sudden reboot (MJPG encoder not affected, but not fast enough) on Jetson Orin Nano Jetson Orin Nano tensorrt , jetson-inference , gstreamer , jetson	52	830	June 18, 2024
Tensorrt Inference in Real time Jetson Nano tensorrt , jetson-inference , gstreamer , python	8	1730	April 12, 2023
Extremely slow inference in TensorRT for live semantic segmentation model Jetson AGX Xavier tensorrt , tensorflow , jetson-inference	11	4380	April 12, 2022
Getting wrong result on ONNX file using cutom script. (TLT to ONNX) TAO Toolkit inception	9	108	August 27, 2024
ERORR with ONNX2TRT : Unknown embedded device detected Jetson Xavier NX onnx	18	4567	April 27, 2022
No result when using tensorRT Sample FasterRCNN with other images Jetson TX2	43	5942	October 18, 2021
Engine Plan Inference on JetsonTX2 Jetson TX2 tensorrt , python	11	1844	October 18, 2021
jetson-inference with OpenCV camera input? Jetson TX2 opencv	14	6213	October 18, 2021

Object Detection inference problem: image updates but bounding box (bbox) is fixed to bbox from first frame

Related topics