CUDA kernel method run with 700 error

main.cpp

int main(int argc, char *argv)
{
int width = 2560;
int height = 1440;
cv::Mat img = cv::Mat::zeros(height, width, CV_8UC3);
size_t size = height * width * sizeof(uchar) * 3;
uchar *yuv_buffer = (uchar *)malloc(size);
memset(yuv_buffer, 0, size);
uchar *d_yuv;
CUDA_CHECK(cudaMalloc((void **)&d_yuv, size));
CUDA_CHECK(cudaMemset(d_yuv, 0, size));
uchar *d_src;
CUDA_CHECK(cudaMalloc((void **)&d_src, size));
CUDA_CHECK(cudaMemset(d_src, 0, size));
CUDA_CHECK(cudaMemcpy(d_yuv, yuv_buffer, size, cudaMemcpyHostToDevice));
gpuConvertYUV420ptoBGR24(d_yuv, d_src, width, height);
CUDA_CHECK(cudaMemcpy(img.data, d_src, size, cudaMemcpyDeviceToHost));
char out_tag_pic[1024];
std::string time_str = get_time_str();
sprintf(out_tag_pic, “%s.jpg”, time_str.c_str());
cv::imwrite(out_tag_pic, img);
free(yuv_buffer);
CUDA_CHECK(cudaFree(d_yuv));
CUDA_CHECK(cudaFree(d_src));
return 0;
}

tool.cu

device inline float clamp(float val, float mn, float mx)
{
return (val >= mn) ? ((val <= mx) ? val : mx) : mn;
}

global void gpuConvertYUYVtoBGR_kernel(unsigned char *src, unsigned char *dst,
unsigned int width, unsigned int height)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx > width)
{
return;
}
for (int i = 0; i < height; ++i)
{
int index_Y = i * width + idx;
int index_U = width * height + i / 2 * width / 2 + idx / 2;
int index_V = width * height + width * height / 4 + i / 2 * width / 2 + idx / 2;
unsigned char Y = src[index_Y];
unsigned char U = src[index_U];
unsigned char V = src[index_V];
unsigned char R = clamp(Y + 1.402 * (V - 128), 0.0f, 255.0f);
unsigned char G = clamp(Y - 0.34413 * (U - 128) - 0.71414 * (V - 128), 0.0f, 255.0f);
unsigned char B = clamp(Y + 1.772 * (U - 128), 0.0f, 255.0f);
dst[(i * width + idx) * 3 + 2] = R;
dst[(i * width + idx) * 3 + 1] = G;
dst[(i * width + idx) * 3 + 0] = B;
}
}

void gpuConvertYUV420ptoBGR24(unsigned char *src, unsigned char *dst, unsigned int width, unsigned int height)
{
unsigned int blockSize = 256;
unsigned int numBlocks = width / blockSize + 1;
gpuConvertYUYVtoBGR_kernel<<<numBlocks, blockSize>>>(src, dst, width, height);
}

it works well when i set width=1920 and height=1080.
however, it runs with 700 error when i set width=2560 and height=1440, error come from “dst[(i * width + idx) * 3 + 2] = R;dst[(i * width + idx) * 3 + 1] = G;dst[(i * width + idx) * 3 + 0] = B;”

env:

Hi,

Is this issue duplicated to the below topic or a new issue?

Thanks.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.