Software Version
DRIVE OS Linux 5.2.6 and DriveWorks 4.0
Hardware Platform
DRIVE AGX Xavier
Hi,
I try to use nppiFilterGaussBorder_8u_C1R, but whatever I do it returns -1000 (NPP_CUDA_KERNEL_EXECUTION_ERROR) and I don’t know what can be the issue.
cudaStream_t cudaStream;
if(auto const cudaRes{cudaStreamCreate(&cudaStream)}; cudaRes != cudaSuccess)
{
GST_ERROR("Cannot create cuda stream: %d", cudaRes);
}
if(nppGetStream() != cudaStream)
{
if(auto const setStreamResult{nppSetStream(cudaStream)}; setStreamResult != 0)
{
GST_ERROR("nppSetStream error: %d", setStreamResult);
}
}
Npp8u* cudaMem = nppsMalloc_8u(256 * 256);
size_t pitch = 256;
Npp8u* cudaMemDst = nppsMalloc_8u(256 * 256);
size_t pitchDst = 256;
if(cudaMem == nullptr)
{
GST_ERROR("nppsMalloc_8u failed");
throw std::runtime_error("Error malloc");
}
if(cudaMemDst == nullptr)
{
GST_ERROR("nppsMalloc_8u dst failed");
throw std::runtime_error("Error malloc dst");
}
if(auto const memsetRes{nppsZero_8u(cudaMem, 256 * 256)}; memsetRes != 0)
{
GST_ERROR("nppsZero_8u failed %d", memsetRes);
}
if(auto const memsetRes{nppsZero_8u(cudaMemDst, 256 * 256)}; memsetRes != 0)
{
GST_ERROR("nppsZero_8u dst failed %d", memsetRes);
}
if(auto const syncRes{cudaStreamSynchronize(cudaStream)}; syncRes != cudaSuccess)
{
GST_ERROR("cudaStreamSynchronize 1 failed %d", syncRes);
}
auto const nppiError{nppiFilterGaussBorder_8u_C1R(
cudaMem + 16 * pitch + 16,
pitch,
NppiSize{256, 256},
NppiPoint{16, 16},
cudaMemDst + 16 * pitchDst + 16,
pitchDst,
NppiSize{256 - (2 * 16), 256 - (2 * 16)},
NPP_MASK_SIZE_3_X_3,
NPP_BORDER_REPLICATE
)};
if(nppiError != 0)
{
GST_ERROR("nppiFilterGaussBorder_8u_C1R failed: %d", nppiError);
}
if(auto const syncRes{cudaStreamSynchronize(cudaStream)}; syncRes != cudaSuccess)
{
GST_ERROR("[BlurRectDrawer] cudaStreamSynchronize 3 failed %d", syncRes);
}
nppsFree(cudaMem);
nppsFree(cudaMemDst);
cudaStreamDestroy(cudaStream);
Could you please help figuring out the problem?