When I use nppiNV12ToBGR_709HDTV_8u_P2C3R()
on Windows 11 with CUDA SDK 12.1 and widths that are not multiples of 4 and I reset the device (cudaDeviceReset()
) between calls nppiNV12ToBGR_709HDTV_8u_P2C3R()
either returns NPP_CUDA_KERNEL_EXECUTION_ERROR
or throws an exception.
E.g. For me the following fails on the second call to nppCaller()
bool nppCaller(const size_t width) {
if(cudaSetDevice(0) != cudaSuccess)
return false;
Npp8u* luma = 0, * chroma = 0, * bgr = 0;
size_t pitchLuma = 0, pitchChroma = 0, pitchBgr = 0, height = 32;
if (cudaMallocPitch(&luma, &pitchLuma, width, height) != cudaSuccess)
return false;
if (cudaMallocPitch(&chroma, &pitchChroma, width, height / 2) != cudaSuccess)
return false;
if (cudaMallocPitch(&bgr, &pitchBgr, width * 3, height) != cudaSuccess)
return false;
Npp8u* pSrc[2] = { luma, chroma };
NppiSize oSizeROI = { width, height };
if (nppiNV12ToBGR_709HDTV_8u_P2C3R(pSrc, pitchLuma, bgr, pitchBgr, oSizeROI) != NPP_NO_ERROR)
return false;
return true;
}
const size_t width = 321;
if (!nppCaller(width)) cout << "FAIL" << endl;
if ( cudaDeviceReset() != cudaSuccess) cout << err << endl;
if (!nppCaller(width)) cout << "FAIL" << endl;
I am not sure if this is an error a careless mistake on my part or a restriction on the width which I am not aware of. Additionaly I can’t determine error is a result of calling cudaDeviceReset()
or if cudaDeviceReset()
is exposing the error.
Any help would be appreciated.