Hello, I want to convert the example code ConvolutionFFT2D to ConvolutionFFT3D, i.e. perform 3D FFT convolution in CUDA. This is the first time I program in CUDA.
Most of the code is straight forward to change to 3D from 2D, but I got some problems.
I’m a bit confused about the memory allocation, why is the memory for a_Kernel allocated with cudaMallocArray and d_PaddedKernel with cudaMalloc?
Is there a way to use cudaMallocArray with 3 dimensions instead of 2? I tried it but the compiler complained.
Why do I need to bind a texture to the memory allocated with cudaMallocArray? Because the texture memory is faster?
I changed tex2D to tex3D in the kernel-code but how do I bind to a 3D texture if I cannot use cudaMallocArray for 3D ?
Is the textures only used for the padding of the data, not for the actual FFT calculations?
printf("Allocating memory...\n");
h_Kernel = (Complex *)malloc(KERNEL_SIZE);
h_Data = (Complex *)malloc(DATA_SIZE);
h_ResultCPU = (Complex *)malloc(DATA_SIZE);
h_ResultGPU = (Complex *)malloc(FFT_SIZE);
//cutilSafeCall( cudaMallocArray(&a_Kernel, &float2tex, KERNEL_W, KERNEL_H) );
//cutilSafeCall( cudaMallocArray(&a_Data, &float2tex, DATA_W, DATA_H) );
cudaMalloc((void**)&a_Kernel, sizeof(cufftComplex)*KERNEL_W * KERNEL_H * KERNEL_D);
cudaMalloc((void**)&a_Data, sizeof(cufftComplex)*DATA_W * DATA_H * DATA_D);
cutilSafeCall( cudaMalloc((void **)&d_PaddedKernel, FFT_SIZE) );
cutilSafeCall( cudaMalloc((void **)&d_PaddedData, FFT_SIZE) );
printf("...copying input data and convolution kernel from host to CUDA arrays\n");
cutilSafeCall( cudaMemcpyToArray(a_Kernel, 0, 0, h_Kernel, KERNEL_SIZE, cudaMemcpyHostToDevice) );
cutilSafeCall( cudaMemcpyToArray(a_Data, 0, 0, h_Data, DATA_SIZE, cudaMemcpyHostToDevice) );
printf("...binding CUDA arrays to texture references\n");
cutilSafeCall( cudaBindTextureToArray(texKernel, a_Kernel) );
cutilSafeCall( cudaBindTextureToArray(texData, a_Data) );