hi,
Here is my way to copy, but it is to slow. Is there a way to device_to_device?
init() and process() can work fine with one camera(60fps), function process() just cost 2-3ms.
But when I use 4 cameras(each has 60fps), function process() cost 19 ms.
init()
{
…
NvBufferGetParams(m_argb_dmabuf_fd,&m_argb_parm);//pitch 9984
NvBufferMemMap(m_argb_dmabuf_fd, 0, NvBufferMem_Read_Write, &m_argb_dmabuf_buffer);
vesc_bayer_dev_ = nppiMalloc_8u_C1(CAMERA_IMAGE_WIDTH, CAMERA_IMAGE_HEIGHT, &resv_bayer_step_);//pitch 2560
resv_rgba_dev_ = nppiMalloc_8u_C4(CAMERA_IMAGE_WIDTH, CAMERA_IMAGE_HEIGHT, &resv_rgba_step_); //pitch 10240
}
process()
{
…
NvBufferMemSyncForCpu(m_argb_dmabuf_fd, 0, &m_argb_dmabuf_buffer);
cudaMemcpy2D(m_argb_dmabuf_buffer, m_argb_parm.pitch[0], resv_rgba_dev_, 24484, 24484, 2048, cudaMemcpyDeviceToHost);
NvBufferMemSyncForDevice(m_argb_dmabuf_fd, 0, &m_argb_dmabuf_buffer);
}
I am trying blow code, but it doesn’t work.
cudaMemcpy2D(m_argb_parm.nv_buffer, m_argb_parm.pitch[0], resv_rgba_dev_, 24484, 24484, 2048, cudaMemcpyDeviceToDevice);