dps4 on xavier: failed to register EGLImage in cuda

i start a pipeline : filesrc ! h264parse ! nvv4l2decoder ! video/x-raw(memory:NVMM), format=NV12 ! fakesink
and i would like to get the gpu address from the pad of fakesink.

static GstPadProbeReturn decode_pad_buffer_probe_RT_yuv(GstPad * pad, GstPadProbeInfo * probe_info,
	gpointer u_data) {
	GstBuffer *inbuf = (GstBuffer *)probe_info->data;
	GstMapInfo in_map_info;
	GstFlowReturn flow_ret = GST_FLOW_OK;

	memset(&in_map_info, 0, sizeof(in_map_info));
	if (!gst_buffer_map(inbuf, &in_map_info, GST_MAP_READ)) {
		flow_ret = GST_FLOW_ERROR;
		printf("error\n");
	}

	NvBufSurface *surface = NULL;
	surface = (NvBufSurface *) in_map_info.data;
	NvBufSurfaceMapEglImage(surface,0);

	int image_height = surface->surfaceList->height;
	int image_width  = surface->surfaceList->width;

	CUresult status;
	CUeglFrame eglFrame;
	CUgraphicsResource pResource = NULL;
	int ret = cuGraphicsEGLRegisterImage(&pResource,
		surface->surfaceList->mappedAddr.eglImage,
		CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
	if (ret != CUDA_SUCCESS) {
      g_printerr ("Failed to register EGLImage in cuda : %d \n",ret);
    }

    if (cuGraphicsResourceGetMappedEglFrame (&eglFrame,
            pResource, 0, 0) != CUDA_SUCCESS) {
      g_printerr ("Failed to get mapped EGL Frame\n");
    }

	int yLen = (image_height*image_width);
	int frameLen = yLen + (yLen>>1);
	if (frameLen > surface->surfaceList->dataSize)
		frameLen = surface->surfaceList->dataSize;
	unsigned char* yuvData;
	cudaMalloc((void**)&yuvData, frameLen);
	cudaMemcpy(yuvData,(unsigned char *)eglFrame.frame.pPitch[0],frameLen,cudaMemcpyDeviceToDevice);

	status = cuCtxSynchronize();
	if (status != CUDA_SUCCESS)
	{
		printf("cuCtxSynchronize failed after memcpy\n");
	}

	static int i = 0;
	if (i == 0){
	
		int bsize = frameLen;
		char* yuvCh;
		cudaMallocHost(&yuvCh,bsize);
		cudaMemcpy(yuvCh, (unsigned char *)yuvData ,bsize , cudaMemcpyDeviceToHost);

		char fname[128];
		snprintf(fname,128,"yuv.data");
		printf("read yuv data file:%s\n",fname);
		FILE* sfile = fopen(fname,"w+");
		if (sfile){
			fwrite((char*)yuvCh,1,bsize,sfile);
			fclose(sfile);
		}
		cudaFreeHost(yuvCh);		
		i = 1;
	}

	status = cuGraphicsUnregisterResource(pResource);
	if (status != CUDA_SUCCESS)
	{
		printf("cuGraphicsEGLUnRegisterResource failed: %d\n", status);
	}

	gst_buffer_unmap(inbuf, &in_map_info);
	return GST_PAD_PROBE_OK;

}

but it failed to register EGLImage in cuda,the error return 201.error 201 means CUDA_ERROR_INVALID_CONTEXT.
how can i get the NV12 data ptr?

Hi,
You may try nvviafilter. Here is a sample:
https://devtalk.nvidia.com/default/topic/1046218/jetson-tx2/unable-to-overlay-text-when-using-udpsrc-/post/5310313/#5310313

Hi,there.I want to Map the NV12 data and write it into disk.But I don’t know how to do.And here is my code.Could you give me a sample?

Thanks

static GstPadProbeReturn decode_pad_buffer_probe_RT_yuv(GstPad * pad, GstPadProbeInfo * probe_info,
	gpointer u_data) {
	GstBuffer *inbuf = (GstBuffer *)probe_info->data;
	GstMapInfo in_map_info;
	GstFlowReturn flow_ret = GST_FLOW_OK;
	
	memset(&in_map_info, 0, sizeof(in_map_info));
	if (!gst_buffer_map(inbuf, &in_map_info, GST_MAP_READ)) {
		flow_ret = GST_FLOW_ERROR;
		printf("error\n");
	}

	NvBufSurface *surface = NULL;
	surface = (NvBufSurface *) in_map_info.data;

	NvBufSurfaceMap(surface, 0, 0, NVBUF_MAP_READ_WRITE);
	

	int image_width = surface->surfaceList->width;
	int image_height = surface->surfaceList->height;
	
	int yLen = (image_height*image_width);
	int frameLen = yLen + (yLen>>1);
	int rgbaLen = sizeof(unsigned char) * image_height * image_width * 4;
	unsigned char* yuvData;
	cudaHostAlloc((void**)&yuvData, frameLen ,cudaHostAllocMapped);
	cudaMemcpy(yuvData,(unsigned char *)surface->surfaceList[0].mappedAddr.addr[0],frameLen,cudaMemcpyHostToHost);
	
	static int i = 0;
	if (i == 0){
		printf("width:%d, height:%d, size:%d", image_width, image_height, frameLen);

		char fname[128];
		snprintf(fname,128,"yuv.data");
		printf("read yuv data file:%s\n",fname);
		FILE* sfile = fopen(fname,"w+");
		if (sfile){
			fwrite((char*)surface->surfaceList[0].mappedAddr.addr[0],1,frameLen,sfile);
			fclose(sfile);
		}		
		i = 1;
	}
	cudaFreeHost(yuvData);
	yuvData = nullptr;
	NvBufSurfaceUnMap(surface, 0, 0);
	gst_buffer_unmap(inbuf, &in_map_info);
	return GST_PAD_PROBE_OK;

}

int startpipeline(std::string video) {
	GMainLoop *loop = NULL;
  GstElement *pipeline = NULL, *source = NULL, *h264parser = NULL,
      *decoder = NULL, *sink = NULL, *nvvidconv = NULL,
      *nvosd = NULL;
  GstBus *bus = NULL;
  guint bus_watch_id;
  GstPad *osd_sink_pad = NULL;

  /* Standard GStreamer initialization */
  loop = g_main_loop_new (NULL, FALSE);

  /* Create gstreamer elements */
  /* Create Pipeline element that will form a connection of other elements */
  pipeline = gst_pipeline_new ("dstest1-pipeline");

  /* Source element for reading from the file */
  source = gst_element_factory_make ("filesrc", "file-source");

  /* Since the data format in the input file is elementary h264 stream,
   * we need a h264parser */
  h264parser = gst_element_factory_make ("h264parse", "h264-parser");

  /* Use nvdec_h264 for hardware accelerated decode on GPU */
  decoder = gst_element_factory_make ("nvv4l2decoder", "nvv4l2-decoder");

  if (!pipeline) {
    g_printerr ("One element could not be created. Exiting.\n");
    return -1;
  }

  /* Use convertor to convert from NV12 to RGBA as required by nvosd */
  nvvidconv = gst_element_factory_make ("nvvideoconvert", "nvvideo-converter");

	GstCaps *caps1 = NULL, *caps2 = NULL;
	GstElement *filter1 = gst_element_factory_make("capsfilter", "filter1");
	GstElement *filter2 = gst_element_factory_make("capsfilter", "filter2");
	if (!filter1 || !filter2)
	{
		printf("filter could not be created. Exiting.\n");
		return -1;
	}

	caps1 = gst_caps_from_string("video/x-raw(memory:NVMM), format=NV12");
	g_object_set(G_OBJECT(filter1), "caps", caps1, NULL);
	gst_caps_unref(caps1);
	caps2 = gst_caps_from_string("video/x-raw(memory:NVMM), format=RGBA");
	g_object_set(G_OBJECT(filter2), "caps", caps2, NULL);
	gst_caps_unref(caps2);

/* Finally render the osd output */
  sink = gst_element_factory_make ("fakesink", "nvvideo-renderer");

  if (!source || !h264parser || !decoder
      || !nvvidconv || !sink) {
    g_printerr ("source,h264parser,decoder,nvvidconv,sink One element could not be created. Exiting.\n");
    return -1;
  }

  /* we set the input filename to the source element */
  g_object_set (G_OBJECT (source), "location", video.c_str(), NULL);

  /* we add a message handler */
  bus = gst_pipeline_get_bus (GST_PIPELINE (pipeline));
  bus_watch_id = gst_bus_add_watch (bus, bus_call, loop);
  gst_object_unref (bus);

  /* Set up the pipeline */
  /* we add all elements into the pipeline */
  gst_bin_add_many (GST_BIN (pipeline),
      source, h264parser, decoder,filter1,
      sink, NULL);

  if (!gst_element_link_many (source, h264parser, decoder, filter1,sink, NULL)) {
    g_printerr ("Elements could not be linked: 2. Exiting.\n");
    return -1;
  }

  GstPad *pad;
  pad = gst_element_get_static_pad(sink, "sink");
  gst_pad_add_probe(pad, GST_PAD_PROBE_TYPE_BUFFER, (GstPadProbeCallback)decode_pad_buffer_probe_RT_yuv, nullptr,nullptr);
  
  /* Set the pipeline to "playing" state */
  gst_element_set_state (pipeline, GST_STATE_PLAYING);
  
  /* Wait till pipeline encounters an error or EOS */
  g_print ("Running...\n");
  g_main_loop_run (loop);

  /* Out of the main loop, clean up nicely */
  g_print ("Returned, stopping playback\n");
  gst_element_set_state (pipeline, GST_STATE_NULL);
  g_print ("Deleting pipeline\n");
  gst_object_unref (GST_OBJECT (pipeline));
  g_source_remove (bus_watch_id);
  g_main_loop_unref (loop);
  return 0;
}

Hi,
We encourage users do integration. You may refer to the sample and add fopen(), fwrite(), fclose() to dump YUVs out.

Another quick way is to run

filesrc ! h264parse ! nvv4l2decoder ! video/x-raw(memory:NVMM), format=NV12 ! nvvidconv ! video/x-raw ! multifilesink location=dump%05d.nv12

Hi,I have do integration on the function of decode_pad_buffer_probe_RT_yuv.But I found the data is not right , and I have make sure that it can be worked to save RGBA data on this:

filesrc ! h264parse ! nvv4l2decoder ! video/x-raw(memory:NVMM), format=NV12 ! nvvidconv ! video/x-raw(memory:NVMM), format=RGBA ! fakesink

hi,
The output format of decoder is block linear. Please create a pitch linear buffer via NvBufSurfaceCreate() and call NvBufSurfTransform()

Hi,I am following your suggestion.And I have success to write the YUV data into file.But I found the data is not right totally.The data just have Y data,but not UV.Here is my code.Could you give me some suggestion?

static GstPadProbeReturn decode_pad_buffer_probe_RT_yuv(GstPad * pad, GstPadProbeInfo * probe_info,
	gpointer u_data) {
	GstBuffer *inbuf = (GstBuffer *)probe_info->data;
	GstMapInfo in_map_info;
	GstFlowReturn flow_ret = GST_FLOW_OK;
	
	memset(&in_map_info, 0, sizeof(in_map_info));
	if (!gst_buffer_map(inbuf, &in_map_info, GST_MAP_READ)) {
		flow_ret = GST_FLOW_ERROR;
		printf("error\n");
	}

	NvBufSurface *surface = NULL;
	surface = (NvBufSurface *) in_map_info.data;

	int image_width = surface->surfaceList->width;
	int image_height = surface->surfaceList->height;

	NvBufSurfTransformConfigParams transform_config_params;
	transform_config_params.compute_mode = NvBufSurfTransformCompute_Default;
	transform_config_params.gpu_id = 0;
	// Set the transform session parameters for the conversions executed in this
	// thread.
	NvBufSurfTransform_Error err = NvBufSurfTransformSetSessionParams (&transform_config_params);
	if (err != NvBufSurfTransformError_Success) {
		printf("error : NvBufSurfTransformSetSessionParams\n");
	}

	NvBufSurfaceCreateParams create_params;
	create_params.gpuId  = 0;
	create_params.width  = image_width;
	create_params.height = image_height;
	create_params.size = surface->surfaceList->dataSize;
	create_params.colorFormat = surface->surfaceList->colorFormat;
	create_params.layout = NVBUF_LAYOUT_PITCH;
#ifdef __aarch64__
	create_params.memType = surface->memType;//NVBUF_MEM_DEFAULT;
#else
  create_params.memType = surface->memType; //NVBUF_MEM_CUDA_UNIFIED;
#endif

	NvBufSurface *surface1 = NULL;
	if (NvBufSurfaceCreate (&surface1, 1,
			&create_params) != 0) {
		GST_ERROR ("Error: Could not allocate internal buffer for dsexample");
	}

	NvBufSurfTransformParams transform_params;
	transform_params.transform_flag = NVBUFSURF_TRANSFORM_FILTER;
	//transform_params.transform_filter = NvBufSurfTransformInter_Bilinear;


	err = NvBufSurfTransform (surface, surface1, &transform_params);
	if (err != NvBufSurfTransformError_Success) {
		printf("error : NvBufSurfTransform \n");
	}

	// Map the buffer so that it can be accessed by CPU
	if (NvBufSurfaceMap (surface1, 0, 0, NVBUF_MAP_READ_WRITE) != 0){
	printf("error : NvBufSurfaceMap \n");
	}

	int yLen = (image_height*image_width);
	int frameLen = yLen + (yLen>>1);
	int rgbaLen = sizeof(unsigned char) * image_height * image_width * 4;
	unsigned char* yuvData;
	cudaHostAlloc((void**)&yuvData,  frameLen ,cudaHostAllocMapped);
	cudaMemcpy(yuvData,(unsigned char *)surface1->surfaceList[0].mappedAddr.addr[0], frameLen,cudaMemcpyHostToHost);
	
	static int i = 0;
	if (i == 0){
		printf("width:%d, height:%d, size:%d", image_width, image_height, frameLen);

		char fname[128];
		snprintf(fname,128,"720p1.yuv");
		printf("read yuv data file:%s\n",fname);
		FILE* sfile = fopen(fname,"w+");
		if (sfile){
			fwrite((char*)surface1->surfaceList[0].mappedAddr.addr[0],1,frameLen,sfile);
			fclose(sfile);
		}		
		i = 1;
	}
	cudaFreeHost(yuvData);
	yuvData = nullptr;
	NvBufSurfaceUnMap(surface1, 0, 0);
	gst_buffer_unmap(inbuf, &in_map_info);
	return GST_PAD_PROBE_OK;

}

Hi,
For NV12, UV is on plane 1. For I420, U is on plane 1 and V is on plane 2. You have to map the plane.

int NvBufSurfaceMap (NvBufSurface *surf, int index, int plane, NvBufSurfaceMemMapFlags type);

Hi,
Thanks for your reply.My problem solved.

[quote]

Hi,
Thanks for your reply.My problem solved.

Using NvBufSurfaceMap will cause the CPU load to be too high. I want to use NvBufSurfaceMapEglImage instead, but what is the use of nvviafilter? I don’t understand what you mean.

Hi there,
I want to get the NV12 data and copy it.Now I can do this with NvBufSurfaceMap on cpu.
But how to do this on gpu. NvBufSurfaceMapEglImage supported only memory type NVBUF_MEM_SURFACE_ARRAY.

Hi,
Please check

tegra_multimedia_api\samples\common\algorithm\cuda\NvCudaProc.cpp

You can access EGLImage through CUDA.

Thanks for reply.
I have checked the sample,but how can I get the EGLImageKHR.And surface->surfaceList->mappedAddr.eglImage is NULL.
And I can not create EGLImageKHR through NvEGLImageFromFd(egl_display, surface1->surfaceList->bufferDesc).
Because dmabuf_fd of surface1->surfaceList->bufferDesc is not right.
Making sure we are talking about NV12.

Hi,
There is sample code in
deepstream_sdk_v4.0_jetson/sources/gst-plugins/gst-dsexample/gstdsexample.cpp

#ifdef __aarch64__
  // To use the converted buffer in CUDA, create an EGLImage and then use
  // CUDA-EGL interop APIs
  if (USE_EGLIMAGE) {
    if (NvBufSurfaceMapEglImage (dsexample->inter_buf, 0) !=0 ) {
      goto error;
    }
    g_print("eglimage=%p \n", dsexample->inter_buf->surfaceList[0].mappedAddr.eglImage);
    // dsexample->inter_buf->surfaceList[0].mappedAddr.eglImage
    // Use interop APIs cuGraphicsEGLRegisterImage and
    // cuGraphicsResourceGetMappedEglFrame to access the buffer in CUDA

    // Destroy the EGLImage
    NvBufSurfaceUnMapEglImage (dsexample->inter_buf, 0);
  }
#endif

We have double confirmed eglimage is not NULL. You may refer to it and check where the deviation is between the sample and your implementation.

Thanks for your help.I get it.