Bad performance using Jetson-inference c++

Hello,

I’m using the Jetson inference Detectnet over a RTSP stream. While using the python examples, this works fine.
Python snippet:

    input_stream = videoSource(input_stream_link, argv=[f'--input-width={width}', f'--input-height={height}', '--input-frameRate=10'])
    output_stream_link = "rtsp://172.16.160.37:855" + str(5+index) + "/AI_stream" + str(index)
    output_stream = videoOutput(output_stream_link, argv=[f'--output-width={width}', f'--output-height={height}', '--output-frameRate=10', '--output-codec=h264'])

    net = jetson_inference.detectNet("ssd-mobilenet-v2", threshold=0.3)

    print(f"output_stream_link: {output_stream_link}")

    i = 0
    timeout = 20000

    while True:
        img = input_stream.Capture()
        if img==None or i == timeout:
            print(f"Exiting Thread {index}")
            break
        detections = net.Detect(img)
        output_stream.Render(img)

The detection is as expected. No, while integrating this into our C++ application, here we have some issues. The detections are all over the place. Where the python application draws a bounding box over a person fine, the C++ application draws wherever. (using the same Jetson nano for both runs)

C++ Snippet:

inference.h
    uchar3* img_RGB = NULL;
    detectNet* net = NULL;
    detectNet::Detection* detections = NULL;
    int numDetections = 0;
    uint32_t overlayFlags = detectNet::OverlayFlagsFromStr("overlay,box,labels,conf");
inference.cpp
int Inference::do_inference(NvEglImage* frame, int width, int height){
    size_t img_RGB_size = width * height * sizeof(uchar3);

    CUresult status;
    cudaError cuda_error;
    CUeglFrame egl_frame;
    CUgraphicsResource pResource = NULL;

    cudaFree(0);
    status = cuGraphicsEGLRegisterImage(&pResource, frame->image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
    if (status != CUDA_SUCCESS){
        g_warning("cuGraphicsEGLRegisterImage failed: %d\n", status);
        return status;
    }
    status = cuGraphicsResourceGetMappedEglFrame(&egl_frame, pResource, 0, 0);
    if (status != CUDA_SUCCESS){
        g_warning("cuGraphicsSubResourceGetMappedArray failed: %d\n", status);
        return status;
    }

    cuda_error = cudaMallocManaged (&img_RGB, img_RGB_size);
    if(cuda_error != cudaSuccess){
        g_warning("cudaMallocManaged failed: %d", cuda_error);
        return cuda_error;
    }

    // convert from i420 to rgba32
    cuda_error = cudaConvertColor(egl_frame.frame.pPitch[0], IMAGE_I420, img_RGB, IMAGE_RGB8 , width, height);

    if(cuda_error != cudaSuccess ) {
        g_warning("cudaConvertColor I420 -> RGB failed: %d", cuda_error);
        return cuda_error;
    }

    numDetections = net->Detect(img_RGB, width, height, &detections, overlayFlags);

    // convert from i420 to rgba32
    cuda_error = cudaConvertColor(img_RGB, IMAGE_RGB8, egl_frame.frame.pPitch[0], IMAGE_I420, width, height);
    if(cuda_error != cudaSuccess ){
        g_warning("cudaConvertColor RGB -> I420 failed: %d", cuda_error);
        return cuda_error;
    }

    status = cuCtxSynchronize();
    if (status != CUDA_SUCCESS){
        g_warning("cuCtxSynchronize failed\n");
        return status;
    }

    last_height = height;
    last_width = width;
    cuGraphicsUnregisterResource(pResource);
    if (img_RGB != NULL){
        cudaFree(img_RGB);
    }
    return 0;
}

For context, a gstreamer pipeline writes the NvEglImage to the do_inference function. Here I do the needed conversions, do the detection, and write this back to the NvEglImage* frame.

Is there anything inherently wrong the way I’m doing this?

Kind regards

Hi,

Do you use the same data input?
Would you mind trying to feed the same input as Python so we can know if the issue comes from the inference part or color conversion?

Thanks.

Yes, I’m using the same RTSP input for both programs at the same time. I should have mentioned this earlier.

EDIT:
The Gstreamer launch string is:
this->launch_string =
“rtspsrc location=” + url + " latency=20 "
"! rtph264depay "
"! nvv4l2decoder "
"! nvvidconv "
“! video/x-raw(memory:NVMM),format=I420”
“! appsink name=srcvideosink sync=true”;

Hi,

Sorry for the unclear comment.

In Python, the pre-processing is:

img = input_stream.Capture()

But in C++, the input goes through:

status = cuGraphicsResourceGetMappedEglFrame(&egl_frame, pResource, 0, 0);
...
cuda_error = cudaConvertColor(egl_frame.frame.pPitch[0], IMAGE_I420, img_RGB, IMAGE_RGB8 , width, height);

Would you mind double-checking if the img_RGB and img are really identical?
As the format difference might cause the accuracy to drop as you observe.

Thanks.

Currently I’m checking if the intermediate image (img_RGB) is right. When writing to the file system using jetson-utils/imageIO, I get the following image:

There is something wrong with what I’m doing, but I’m unsure what this is. When printing the CUeglColorFormat, after cuGraphicsEGLRegisterImage and cuGraphicsResourceGetMappedEglFrame, this prints 0, which should mean the image is a I420 format right?

Hi,

Could you try NV12 to see if it works?

Thanks.

I guess you mean in the cudaConvertColor?

So right now I’m using:

cuda_error = cudaConvertColor(egl_frame.frame.pPitch[0], IMAGE_NV12, img_RGB, IMAGE_RGB8 , width, height);

    if(cuda_error != cudaSuccess ) {
        g_warning("cudaConvertColor I420 -> RGB failed: %d", cuda_error);
        return cuda_error;
    }

    status = cuCtxSynchronize();
    if (status != CUDA_SUCCESS){
        g_warning("cuCtxSynchronize failed\n");
        return status;
    }
    if (!test)
    {
        saveImage("test_RGB.jpg", img_RGB, width, height, IMAGE_RGB8, IMAGE_DEFAULT_SAVE_QUALITY, 0);
        g_warning("width: %d", width);
        g_warning("height: %d", height);
        test = true;
    }

This leaves me with the following:

Hi,

Could you share a runnable sample with us so we can give it a check?
Thanks

So while looking some more into this issue, when printing the eglColorFormat

    CUresult status;
    cudaError cuda_error;
    CUeglFrame egl_frame;
    CUgraphicsResource pResource = NULL;

    cudaFree(0);
    status = cuGraphicsEGLRegisterImage(&pResource, frame->image, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
    if (status != CUDA_SUCCESS){
        g_warning("cuGraphicsEGLRegisterImage failed: %d\n", status);
        return status;
    }
    status = cuGraphicsResourceGetMappedEglFrame(&egl_frame, pResource, 0, 0);
    if (status != CUDA_SUCCESS){
        g_warning("cuGraphicsSubResourceGetMappedArray failed: %d\n", status);
        return status;
    }

    if (egl_frame.eglColorFormat != CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR) {
        g_warning("Unexpected EGL color format!: %s");
    }

This returns 0, meaning:
CU_EGL_COLOR_FORMAT_YUV420_PLANAR = 0x00
Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
(Cuda 10.2)

As I’m the Jetson-inference Color Convert functions expect CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR , I can see where this goes wrong.

Is there a easy way with EGL to convert to th CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR?

Hi,

We need to check this with our internal team and provide more info with you later.

Thanks.

Hi,

In the meantime, could you update the GStreamer string to NV12 and try it again?

this->launch_string =
"rtspsrc location=" + url + " latency=20 "
"! rtph264depay "
"! nvv4l2decoder "
"! nvvidconv bl-output=0 "
"! video/x-raw(memory:NVMM),format=NV12"
"! appsink name=srcvideosink sync=true";

Thanks.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.