Hi all,
I wrote a simple CUDA kernel function which seems failing randomly.
Any help would be greatly appreciated. Thanks.
////////////////////
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include <cuda_runtime.h>
#include <cufft.h>
#include <cufftXt.h>
__global__ void test_kernel(float2 *dev_data, long width, long height, long height_2)
{
long i = threadIdx.x + blockIdx.x * blockDim.x;
long j = threadIdx.y + blockIdx.y * blockDim.y;
if(j < width && i < height_2)
{
dev_data[(i)*width + j].x = 0.0f;
dev_data[(i)*width + j].y = 0.0f;
dev_data[(i+height_2)*width + j].x = 0.0f;
dev_data[(i+height_2)*width + j].y = 0.0f;
}
if(j < width && i < height_2)
{
dev_data[(i)*width + j].x = 1.0f;
dev_data[(i)*width + j].y = 0.0f;
dev_data[(height-i-1)*width + j].x = 2.0f;
dev_data[(height-i-1)*width + j].y = 2.0f;
}
}
void test_cpu(float2 *host_data, long width, long height, long height_2)
{
long i;
long j;
for(i=0; i < height_2; i++)
{
for(j = 0; j < width; j++)
{
host_data[(i)*width + j].x = 0.0f;
host_data[(i)*width + j].y = 0.0f;
host_data[(i+height_2)*width + j].x = 0.0f;
host_data[(i+height_2)*width + j].y = 0.0f;
}
}
for(i=0; i < height_2; i++)
{
for(j = 0; j < width; j++)
{
host_data[(i)*width + j].x = 1.0f;
host_data[(i)*width + j].y = 0.0f;
host_data[(height-i-1)*width + j].x = 2.0f;
host_data[(height-i-1)*width + j].y = 2.0f;
}
}
}
int main()
{
long width = 1024;
long height = 2048;
float2 *dev_data;
(cudaMalloc((void**)&dev_data, height*width*sizeof(float2)));
float2 *host_data = (float2*)malloc(sizeof(float2)*width*height);
dim3 threads(16, 16);
dim3 grid;
grid.x = (height + 15) / 16;
grid.y = (width + 15) / 16;
test_kernel<<<grid, threads>>>(dev_data, width, height, height/2);
(cudaMemcpy(host_data, dev_data, height*width*sizeof(float2), cudaMemcpyDeviceToHost));
float2 *host_data_cpu = (float2*)malloc(sizeof(float2)*width*height);
test_cpu(host_data_cpu, width, height, height/2);
long i,j;
long errc = 0;
for(i=0; i < height; i++)
{
for(j = 0; j < width; j++)
{
if( host_data[(i)*width + j].x != host_data_cpu[(i)*width + j].x)
{
errc++;
}
if( host_data[(i)*width + j].y != host_data_cpu[(i)*width + j].y)
{
errc++;
}
}
}
printf("errc: %ld\n", errc);
if(errc != 0)
{
printf("cuda kernel error\n");
}
cudaFree(dev_data);
free(host_data);
free(host_data_cpu);
return 0;
}
//////////////////////////
Running results:
root@usr:/home# ./test
errc: 616132
cuda kernel error
root@usr:/home# ./test
errc: 610144
cuda kernel error
root@usr:/home# ./test
errc: 591468
cuda kernel error
root@usr:/home# ./test
errc: 607076
cuda kernel error
/////////////////////////////////
OS:
18.04.1-Ubuntu
GPU&CUDA:
Quadro RTX 8000 & 10.1