Cropped bounding box video overlay next to original video

Hello,

I am running my-detection.py and am trying to output a video that has the input video + a cropped video of only the bounding box. I am first working on just the bounding box video.

I will do this for only a single object class to avoid confusion.

To start with I did a crop of the img using cudaCrop. I got an error:
cudaCrop(img, imgCropped, crop_roi)
ValueError: jetson.utils – cudaCrop() had an invalid ROI

So, I referred to Dustys advice here:
https://forums.developer.nvidia.com/t/bounding-box-ssd-jetson-nano/163502

I copied and paste this code ^ (with some changes to the new jetson_utils and jetson_inference) and I still get the same error.

Thanks in advance for your help :)

import sys
import argparse
import cv2
import numpy as np
import serial
import time


from jetson_inference import detectNet
from jetson_utils import videoSource, videoOutput, logUsage, cudaDrawCircle, cudaDrawRect, cudaDrawLine, cudaFont, cudaAllocMapped, cudaOverlay, cudaResize, cudaCrop, saveImage


net = detectNet("ssd-mobilenet-v2", threshold=0.5)
camera = videoSource("/dev/video0")      # '/dev/video0' for V4L2
display = videoOutput("display://0") # 'my_video.mp4' for file

object_count = 0    # keep track of the number of detected objects for the filenames

while True:
	img = camera.Capture()
	detections = net.Detect(img, overlay='none')    # disable drawing of overlay

	for detection in detections:
		# allocate the output image, with the cropped size
		imgCropped = cudaAllocMapped(width=detection.Width,
			                                  height=detection.Height,
			                                  format=img.format)

		# get the cropping ROI from the bounding box
		crop_roi = (detection.Left, detection.Top, detection.Right, detection.Bottom)

		# crop the image
		cudaCrop(img, imgCropped, crop_roi)

		# save the image
		saveImage('object_{:d}.jpg'.format(object_count), imgCropped)
		object_count += 1
		del imgCropped

	display.Render(img)
	display.SetStatus("Object Detection | Network {:.0f} FPS".format(net.GetNetworkFPS()))
    
	if not input.IsStreaming() or not output.IsStreaming():
		break
		
		```

Hi @sanespeed, there is a detectnet-snap.py example which crops the detection ROIs and saves them to disk:

https://github.com/dusty-nv/jetson-inference/blob/master/python/examples/detectnet-snap.py

If you have problems using it, let me know. Thanks!

2 Likes

Thank you Dusty, that worked. You’re a huge help, yet again :)

For anyone playing along at home, here’s the code that I implemented.

It works with 1 object class. Renders the input video. If there is an object detection, it renders the bounding box video next to the input video.

(I am a rookie at python, so please don’t judge too harshly if I’ve made mistakes. I’m happy to take feedback)

#!/usr/bin/env python3
#
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#

import argparse
import datetime
import math
import sys
import os

from jetson_inference import detectNet
from jetson_utils import (videoSource, videoOutput, logUsage, saveImage,
                          cudaAllocMapped, cudaCrop, cudaDeviceSynchronize,
                          cudaOverlay, cudaResize)

# parse the command line
parser = argparse.ArgumentParser(description="Locate objects in a live camera stream using an object detection DNN.", 
                                 formatter_class=argparse.RawTextHelpFormatter, 
                                 epilog=detectNet.Usage() + videoSource.Usage() + videoOutput.Usage() + logUsage())

parser.add_argument("input_URI", type=str, default="", nargs='?', help="URI of the input stream")
parser.add_argument("output_URI", type=str, default="", nargs='?', help="URI of the output stream")
parser.add_argument("--network", type=str, default="ssd-mobilenet-v2", help="pre-trained model to load (see below for options)")
parser.add_argument("--overlay", type=str, default="box,labels,conf", help="detection overlay flags (e.g. --overlay=box,labels,conf)\nvalid combinations are:  'box', 'labels', 'conf', 'none'")
parser.add_argument("--threshold", type=float, default=0.5, help="minimum detection threshold to use") 
parser.add_argument("--snapshots", type=str, default="images/test/detections", help="output directory of detection snapshots")
parser.add_argument("--timestamp", type=str, default="%Y%m%d-%H%M%S-%f", help="timestamp format used in snapshot filenames")

try:
	args = parser.parse_known_args()[0]
except:
	print("")
	parser.print_help()
	sys.exit(0)


# create video output object 
output = videoOutput("cool_detection_video.mp4")
	
# load the object detection network
net = detectNet(argv=["--model=/home/<your-jetson>/jetson-inference/python/training/detection/ssd/models/<your-model>/ssd-mobilenet.onnx", "--labels=/home/<your-jetson>/jetson-inference/python/training/detection/ssd/models/<your-model>/labels.txt", "--input_blob=input_0", "--output_cvg=scores", "--output_bbox=boxes", "--threshold=0.2"])

# create video source
input = videoSource("/dev/video0", argv=["--input-height=720","input-width=1280"])


# process frames until the user exits
while True:
	# capture the next image
	img = input.Capture()

	# detect objects in the image 
	detections = net.Detect(img, overlay='none')
        
        #This is used to determine if an object was detected
	boundBox = None
    	
	for  detection in detections:
		
		roi = (int(detection.Left), int(detection.Top), int(detection.Right), int(detection.Bottom))
		boundBox = cudaAllocMapped(width=roi[2]-roi[0], height=roi[3]-roi[1], format=img.format)
		cudaCrop(img, boundBox, roi)
		cudaDeviceSynchronize()
		break #single detection only, as I only have 1 object class
		
        
	# render the image
	if boundBox:
		#Getting aspect ratio of boundBox
		ratio = detection.Height/detection.Width

		#increasing size of bounding box video to 500 width, maintaining the same aspect ratio.
		boundBoxResized = cudaAllocMapped(width=500, height=int(500*ratio), format=img.format)
		cudaResize(boundBox, boundBoxResized)
		cudaDeviceSynchronize()
		
		#Combining input video with bounding box video
		imgOutput = cudaAllocMapped(width = img.width + 500, height = img.height, format = img.format)
		
		cudaOverlay(img, imgOutput, 0,0)
		cudaOverlay(boundBoxResized, imgOutput, img.width,0)
		cudaDeviceSynchronize()
		
		output.Render(imgOutput)
		del boundBox
	else:
		imgOutput = cudaAllocMapped(width = img.width + 500, height = img.height, format = img.format)
		cudaOverlay(img, imgOutput, 0,0)
		cudaDeviceSynchronize()
		output.Render(imgOutput)
		

	# update the title bar
	output.SetStatus("{:s} | Network {:.0f} FPS".format(args.network, net.GetNetworkFPS()))

	# exit on input/output EOS
	if not input.IsStreaming() or not output.IsStreaming():
		break

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.