Hi, I’m confused why initcheck reports errors with this code (cuda10.2, win10, RTX 2080 Ti):
#include "cuda_runtime.h"
#include <nppi.h>
#include <cstdio>
#include <random>
#include <ctime>
template <typename T>
void _check(T result, char const* const func, char const* const file, int const line)
{
if (result) {
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
static_cast<unsigned int>(result), cudaGetErrorName(result), func);
cudaDeviceReset();
exit(EXIT_FAILURE);
}
}
#define checkCudaErrors(val) _check((val), #val, __FILE__, __LINE__)
constexpr size_t h = 987;
constexpr size_t w = 988;
constexpr size_t c = 3;
constexpr size_t size = h * w * c;
unsigned char h_src[size];
unsigned char h_dst[size];
int main()
{
std::srand(std::time(nullptr));
for (size_t i = 0; i < size; ++i)
h_src[i] = std::rand() / ((RAND_MAX + 1) / 255);
unsigned char* d_srcDst;
size_t step = 0;
checkCudaErrors(cudaMallocPitch(&d_srcDst, &step, w * c, h));
checkCudaErrors(cudaMemcpy2D(d_srcDst, step, h_src, w * c, w * c, h, cudaMemcpyHostToDevice));
NppStatus status = nppiMirror_8u_C3IR(d_srcDst, step, NppiSize{w, h}, NPP_HORIZONTAL_AXIS);
if (status != NPP_NO_ERROR)
{
printf("npp error\n");
return 1;
}
checkCudaErrors(cudaMemcpy2D(h_dst, w * c, d_srcDst, step, w * c, h, cudaMemcpyDeviceToHost));
for (size_t _h = 0; _h < h; ++_h)
for (size_t _w = 0; _w < w; ++_w)
for (size_t _c = 0; _c < c; ++_c)
{
const unsigned char src_val = h_src[_h * w * c + _w * c + _c];
const unsigned char dst_val = h_dst[(h - 1 - _h) * w * c + _w * c + _c]; // horizontal flip
if (src_val != dst_val)
{
printf("wrong!\n");
return 1;
}
}
printf("complete\n");
return 0;
}
Output of initcheck:
========= Uninitialized __global__ memory read of size 1
========= at 0x00000160 in void inPlaceMirrorKernel<unsigned char, unsigned int=3, NppiAxis=0>(Pixel<unsigned char, unsigned int=3>*, unsigned int, NppiSize, int, int)
========= by thread (28,3,0) in block (0,3,0)
========= Address 0xb00ecf454
========= Device Frame:void inPlaceMirrorKernel<unsigned char, unsigned int=3, NppiAxis=0>(Pixel<unsigned char, unsigned int=3>*, unsigned int, NppiSize, int, int) (void inPlaceMirrorKernel<unsigned char, unsigned int=3, NppiAxis=0>(Pixel<unsigned char, unsigned int=3>*, unsigned int, NppiSize, int, int) : 0x160)
========= Saved host backtrace up to driver entry point
========= Host Frame:C:\Windows\system32\nvcuda.dll (cuLaunchKernel + 0x218) [0x1c9718]
========= Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nppig64_10.dll (nppiWarpPerspectiveQuad_8u_P4R_Ctx + 0x7f6) [0x18ef06]
========= Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nppig64_10.dll (nppiWarpPerspectiveQuad_8u_P4R_Ctx + 0x3c4) [0x18ead4]
========= Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nppig64_10.dll [0xfc59]
========= Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nppig64_10.dll [0x149d]
========= Host Frame:C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nppig64_10.dll (nppiMirror_8u_C3IR + 0x81) [0x18e11]
========= Host Frame:E:\repos\opencv_cuda_test\x64\Release\npp_mirror_bug_report_10.2.exe (main + 0x148) [0xdfd8]
========= Host Frame:E:\repos\opencv_cuda_test\x64\Release\npp_mirror_bug_report_10.2.exe (__scrt_common_main_seh + 0x10c) [0xe344]
========= Host Frame:C:\Windows\System32\KERNEL32.DLL (BaseThreadInitThunk + 0x14) [0x17bd4]
========= Host Frame:C:\Windows\SYSTEM32\ntdll.dll (RtlUserThreadStart + 0x21) [0x6ce51]
=========
========= ERROR SUMMARY: 102816 errors