Hello Everyone! I’ve been facing an issue for a while and couldn’t find a solution for it I think it is a bug inside Nvidia libraries. The overall structure of the application is that I read frames from a file via a gstreamer pipeline process and push them to another gstreamer pipeline to encode them into a file. Pipelines are like this:
Input pipeline: filesrc -> qtdemux -> h264parse -> nvv4l2decoder -> nvvidconv (video/x-raw(memory:NVMM), format=RGBA) -> appsink
Output pipeline: appsrc (video/x-raw(memory:NVMM), format=RGBA) -> nvvidconv -> (video/x-raw(memory:NVMM), format=NV12) -> nvv4l2h264enc -> h264parse -> qtmux -> filesrc
Execution block is like this:
void executeThread()
{
NvBufSurfTransformConfigParams config_params;
config_params.compute_mode = NvBufSurfTransformCompute_GPU;
config_params.gpu_id = 0;
config_params.cuda_stream = m_cuda_stream;
if (NvBufSurfTransformSetSessionParams(&config_params) != NvBufSurfTransformError_Success)
{
LOG_ERROR("NvBufSurfTransform set session failed");
exit(-1);
}
while (true)
{
std::unique_lock<std::mutex> lock(m_execute_mutex);
m_execute_cond.wait(lock);
NvBufSurface *in_surface = NULL;
NvBufSurfTransformParams transform_params;
memset(&transform_params, 0, sizeof(NvBufSurfTransformParams));
GstElement *input_pipeline = getPipeline(getIntputPipelineString(m_options).c_str());
GstElement *output_pipeline = getPipeline(getOutputPipelineString(m_options).c_str());
GstElement *appsink = gst_bin_get_by_name(GST_BIN(input_pipeline), "appsink0");
GstElement *appsrc = gst_bin_get_by_name(GST_BIN(output_pipeline), "appsrc0");
if (gst_element_set_state(input_pipeline, GST_STATE_PLAYING) == GST_STATE_CHANGE_FAILURE)
{
LOG_ERROR("Input Pipeline is unable to be set playing!");
break;
}
if (gst_element_set_state(output_pipeline, GST_STATE_PLAYING) == GST_STATE_CHANGE_FAILURE)
{
LOG_ERROR("Output Pipeline is unable to be set playing!");
break;
}
GstFlowReturn ret;
GstBufferPool *pool = getBufferPool(m_options);
GstBuffer *output_buffer = NULL;
GstMemory *mem = NULL;
int frame_id = 0;
bool error = false;
while (!error)
{
GstSample *sample = gst_app_sink_pull_sample(GST_APP_SINK(appsink));
if (!sample)
break;
GstBuffer *input_buffer = gst_sample_get_buffer(sample);
GstMapInfo inmap;
if (!gst_buffer_map(input_buffer, &inmap, GST_MAP_READ))
break;
in_surface = (NvBufSurface *)inmap.data;
if (gst_buffer_pool_acquire_buffer(pool, &output_buffer, NULL) != GST_FLOW_OK)
{
LOG_ERROR("Buffer cannot be acquired from pool.");
break;
}
if (!gst_buffer_peek_memory(output_buffer, 0))
{
LOG_ERROR("Memory block of the buffer is not available.");
break;
}
GstMapInfo outmap;
if (!gst_buffer_map(output_buffer, &outmap, GST_MAP_WRITE))
break;
NvBufSurface *surf = (NvBufSurface *)outmap.data;
if (NvBufSurfTransform(in_surface, surf, &transform_params) != 0)
{
LOG_ERROR("NvBufSurfTransform Failed");
error = true;
break;
}
gst_buffer_unmap(output_buffer, &outmap);
gst_buffer_unmap(input_buffer, &inmap);
gst_sample_unref(sample);
GST_BUFFER_PTS(output_buffer) = input_buffer->pts;
g_signal_emit_by_name(appsrc, "push-buffer", output_buffer, &ret);
gst_buffer_unref(output_buffer);
frame_id++;
}
/* Clean up */
gst_object_unref(appsink);
gst_object_unref(appsrc);
gst_buffer_pool_set_active(pool, FALSE);
gst_object_unref(pool);
cleanUpPipeline(input_pipeline);
cleanUpPipeline(output_pipeline);
{
std::unique_lock<std::mutex> lock(m_is_running_mutex);
m_is_running = false;
}
LOG_INFO("Execution finished.");
}
}
If I remove the part setting NvBufSurfTransformConfigParams to NvBufSurfTransformCompute_GPU it works properly since it uses VIC to copy in_surface to surf(output buffer) however I need to do CUDA operations (please refer to gst-nvdewarper, I will warp frames using NVWarp API)
The issue doesn’t happen always. It produces correct results sometimes
ORIN NX 16 GB - Auvidea JNX42 Carrier board - Jetpack 5.1.1 - L4T 35.3.1
I’m attaching example videos:
video0.mp4: frames copied by VIC
video1.mp4: frames copied by GPU and error type 1
video2.mp4: frames copied by GPU and error type 2
videos.zip (5.2 MB)
If there is no fix for this, is there a way to copy the contents of a memory allocated by cudaMalloc to an NvBufSurface with mem_type NVBUF_MEM_SURFACE_ARRAY
