Good morning,
I have the following code
==============================================================================
int W=1920;
int H=1200;
int nSrcStep = W;
cudaMallocManaged(&ptr_RGBCUDA, (3 * W * H));
cudaMallocManaged(&ptr_YCbCrCUDA, (3 * W * H) / 2);
Npp8u* pYCbCrDst[3];
pYCbCrDst[0] = (Npp8u*)ptr_YCbCrCUDA;
pYCbCrDst[1] = (Npp8u*)((long)ptr_YCbCrCUDA + (long)nSrcStep * (long)h);
pYCbCrDst[2] = (Npp8u*)((long)ptr_YCbCrCUDA + (long)nSrcStep * (long)h + (long)(nSrcStep * h / 4));
int steps[3];
steps[0] = nSrcStep;
steps[1] = nSrcStep / 2;
steps[2] = nSrcStep / 2;
stat = nppiRGBToYUV420_8u_C3P3R((Npp8u*)ptr_RGBCUDA, 3 * W, pYCbCrDst, steps, oSizeROI);
if (stat == NPP_SUCCESS)
{
cerr = cudaMemcpy(ptr_YCbCrHost, ptr_YCbCrCUDA, (3 * W * H) / 2, cudaMemcpyDeviceToHost);
printf(“\nnppiRGBToYUV420_8u_C3P3R result %i %i\n\n”, stat, cerr);
}
else printf(“\nnppiRGBToYUV420_8u_C3P3R error %i\n\n”, stat);
=====================================================================================
I’m testing the following code on two different hardware:
- Jetson Orin Nano and AGX with Jetpack 5.1.1 (Cuda version 11.4)
- Laptop with NVIDIA GeForce RTX 3050, Windows 11, driver version 536.99, Cuda version 12.2
If I run the code on Jetson Orin Nano everything works correctly. If I run the code on Laptop I have nppiRGBToYUV420_8u_C3P3R function that returns NPP_SUCCESS, but the following cudaMemcpy returns error 700 (cudaErrorIllegalAddress).
I tried to debug the code using compute-sanitizer and the result is that I had a lot of error lines like this
========= Invalid global write of size 1 bytes
========= at 0x910 in void ImageColorConversionKernel_4XX_8u<(NppColorModel)0, (NppPixelFormat)3, (NppColorModel)4, (NppPixelFormat)12>(const unsigned char *, const unsigned char *, const unsigned char *, const unsigned char *, int, int, int, unsigned char *, unsigned char *, unsigned char *, unsigned char *, int, int, int, unsigned int, unsigned int)
========= by thread (11,1,0) in block (1,2,0)
========= Address 0xa3462b is out of bounds
It seems is something related to memory access, but I cannot understand where is the issue
Regards