Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU)
Jetson and GPU
• DeepStream Version
6.2
• JetPack Version (valid for Jetson only)
5.1
• TensorRT Version
8.5.2.2
• NVIDIA GPU Driver Version (valid for GPU only)
525
• Issue Type( questions, new requirements, bugs)
Question
• Requirement details( This is for new requirement. Including the module name-for which plugin or for which sample application, the function description)
Deepstream FPS drops to 0 and the video sink gets stuck after processing the first 10 or so frames. The current model we use has a 99% GPU utilization rate on the Jetson device and less on a T4. I have noticed that the GPU usage abruptly falls to 0 once the FPS hits 0. And any print statements on the probe function which is connected to the source pad of the primary inference also stops printing. I am using a custom struct to pass information from the model by the same probe function mentioned above. And since there is no stacktrace available or error available, it has become impossible to monitor it. The nsys report generated is not opening in the nsights system either. Once I remove the probe function, it works properly. Attaching the function as below:
def sink_probe(pad, info, user_data):
gst_buffer = info.get_buffer()
batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
frame_list = batch_meta.frame_meta_list
while frame_list is not None:
try:
frame_meta = pyds.NvDsFrameMeta.cast(frame_list.data)
except StopIteration:
continue
frame_user_meta_list = frame_meta.frame_user_meta_list
while frame_user_meta_list is not None:
try:
frame_user_meta = pyds.NvDsUserMeta.cast(frame_user_meta_list.data)
except StopIteration:
continue
meta_type = frame_user_meta.base_meta.meta_type
if meta_type == pyds.NVDSINFER_TENSOR_OUTPUT_META:
tensor_meta = pyds.NvDsInferTensorMeta.cast(frame_user_meta.user_meta_data)
layer = pyds.get_nvds_LayerInfo(tensor_meta, 0)
ptr = ctypes.cast(pyds.get_ptr(layer.buffer), ctypes.POINTER(ctypes.c_float))
detections = np.ctypeslib.as_array(ptr, shape=(64512,6))
nms_dets = nms(detections=detections, image_size=(512, 512)).flatten().tolist()
data = pyds.alloc_rapid_struct(frame_user_meta)
data.num_detections = len(nms_dets) // 5
data.detections = nms_dets
frame_user_meta.user_meta_data = data
frame_user_meta.base_meta.meta_type = pyds.NvDsMetaType.NVDS_USER_META
try:
frame_user_meta_list = frame_user_meta_list.next
except StopIteration:
break
try:
frame_list = frame_list.next
except StopIteration:
break
return Gst.PadProbeReturn.OK
Pybind11 for custom metadata
#include "bind_string_property_definitions.h"
#include "include/bindrapidmeta.hpp"
namespace py = pybind11;
namespace pydeepstream {
void* copy_rapid_struct(void* data, void* user_meta) {
NvDsUserMeta* source_meta = (NvDsUserMeta*) data;
RapidOutputStruct* source_data = (RapidOutputStruct*) source_meta->user_meta_data;
RapidOutputStruct* destination_data = (RapidOutputStruct*) g_malloc0(sizeof(RapidOutputStruct));
int num_detections = source_data->num_detections;
destination_data->num_detections = source_data->num_detections;
if (num_detections > 0) {
for (int i = 0; i < num_detections * 5; i++) {
destination_data->detections.push_back(source_data->detections[i]);
}
}
return destination_data;
}
void release_rapid_struct(void* data, void* user_data) {
NvDsUserMeta* source_meta = (NvDsUserMeta*) data;
if (source_meta != nullptr) {
RapidOutputStruct* source_data = (RapidOutputStruct*) source_meta->user_meta_data;
if (source_data != nullptr) {
free(source_data);
}
}
}
void bindrapid(py::module &m) {
py::class_<RapidOutputStruct>(
m, "RapidOutputStruct", pydsdoc::rapid::RapidStructDoc::descr)
.def(py::init<>())
.def_readwrite("num_detections", &RapidOutputStruct::num_detections)
.def_readwrite("detections", &RapidOutputStruct::detections)
.def(
"cast", [](void* data) {
return (RapidOutputStruct*) data;
},
py::return_value_policy::reference,
pydsdoc::rapid::RapidStructDoc::cast
);
m.def(
"alloc_rapid_struct",
[](NvDsUserMeta *meta) {
auto* mem = (RapidOutputStruct* ) g_malloc0(sizeof(RapidOutputStruct));
meta->base_meta.copy_func = (NvDsMetaCopyFunc) pydeepstream::copy_rapid_struct;
meta->base_meta.release_func = (NvDsMetaReleaseFunc) pydeepstream::release_rapid_struct;
return mem;
},
py::return_value_policy::reference,
pydsdoc::methodsDoc::alloc_rapid_struct
);
}