I’m trying to process image (1920x1080, BGR) on CUDA using OpenCV object, but every time some space on bottom are black or unprocessed. It is always 1/16 of image height. I’m using the code below with some variations:
__device__ uchar3 invertColor(uchar3 color) {
return make_uchar3(255U - color.x, 255U - color.y, 255U - color.z);
}
__global__ void processKernel(const uchar3 *input, uchar3 *output, int width, int height) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if(x < width && y < height) {
int idx = y * width + x;
// USER CODE BEGIN
output[idx] = invertColor(input[idx]);
// USER CODE END
}
}
void processImageWithCuda(const cv::cuda::GpuMat &src, cv::cuda::GpuMat &dst) {
int width = src.cols;
int height = src.rows;
dim3 blockSize(16, 16);
dim3 gridSize((width + blockSize.x - 1) / blockSize.x,
(height + blockSize.y - 1) / blockSize.y);
if(&src == &dst) {
cv::cuda::GpuMat result(src.size(), src.type());
processKernel<<<gridSize, blockSize>>>(src.ptr<uchar3>(),
result.ptr<uchar3>(), width, height);
cudaDeviceSynchronize();
dst = std::move(result);
} else {
processKernel<<<gridSize, blockSize>>>(src.ptr<uchar3>(), dst.ptr<uchar3>(),
width, height);
cudaDeviceSynchronize();
}
}
I was trying to change the order of data processed by each block, but it is always invalid the bottom part of the image, not the last blocks.
Thanks for your help in advance
More or less important info
- Host: Nvidia Xavier NX
- System: JetPack 5.1.2
- CUDA version: 11.4
- OpenCV version: 4.7
- C++ Compiler: GNU 9.4.0
- Build method: CMake 3.16