FFT not working when other kernel is present FFT issues

I’m trying to port some code to CUDA but ran into a problem with using the cuFFT tool. I’ve managed to reproduce the error in the following code:

#define _USE_MATH_DEFINES 1

#include <math.h>

#include

#include <time.h>

#include

using namespace std;

#include <cuda_runtime.h>

#include <cufft.h>

global void mean();

int main(int argc, char* argv)

{

int n = 64;

int size = n*n;

int ret = 0;	

cufftHandle plan;

cufftComplex *idata, *odata, host_array;

for (int counter =0; counter < size; counter++)

{

	host_array[counter].x = counter;

	host_array[counter].y = counter*counter;

}

ret = cudaMalloc((void**)&idata, sizeof(cufftComplex)*size);

ret = cudaMalloc((void**)&odata, sizeof(cufftComplex)*size);

mean <<<1, n*n>>> ();

/* Create a 2D FFT plan. */

ret = cufftPlan2d(&plan, n, n, CUFFT_C2C);

ret = cudaMemcpy(idata, host_array, sizeof(cufftComplex)*size, cudaMemcpyHostToDevice);

/* Use the CUFFT plan to transform the signal out of place. */

ret = cufftExecC2C(plan, idata, odata, CUFFT_FORWARD);

/* Note: idata != odata indicates an out-of-place transformation

	 to CUFFT at execution time. */

ret = cudaMemcpy(host_array, odata, sizeof(cufftComplex)*size, cudaMemcpyDeviceToHost);

/* Inverse transform the signal in place */

ret = cufftExecC2C(plan, odata, odata, CUFFT_INVERSE);

/* Destroy the CUFFT plan. */

ret = cufftDestroy(plan);

cudaFree(idata); cudaFree(odata);

return 0;

}

global void mean()

{

}

I’m running Ubuntu 9.10 with a GeForce 210 installed.

user@computer:~/Documents/cudadev$ uname -a

Linux 2.6.31-22-generic #65-Ubuntu SMP Thu Sep 16 16:21:34 UTC 2010 x86_64 GNU/Linux

user@computer:~/Documents/cudadev$ nvcc --version

nvcc: NVIDIA ® Cuda compiler driver

Copyright © 2005-2010 NVIDIA Corporation

Built on Tue_Apr_27_19:18:26_PDT_2010

Cuda compilation tools, release 3.1, V0.2.1221

user@computer:~/Documents/cudadev$ nvcc testfft.cu -lcufft

user@computer:~/Documents/cudadev$ ./a.out

cutilCheckMsg() CUTIL CUDA error: spRadix0064B_kernel<FFT_FORWARD>(main) execution failed

in file </home/buildmeister/build/rel/gpgpu/toolkit/r3.1/cufft/src/accel/interface/spRadix0064B.cu>, line 35 : invalid configuration argument.

Another thread about a similar error mentions data alignment. However, if I remove the mean<<<>>>(), everything runs just fine (with all return values = 0)…

After more investigation, I realized I had the grid and thread sizes reversed and this code violates the 512 thread max (honest noob mistake…).

My question then becomes: How do I detect errors such as this and not get weird run-time behavior such as the above message? I realize in this case I could simply check the dims of the grid and thread arguments. But surely there’s an error noted somewhere when I launch my mean kernel…

After more investigation, I realized I had the grid and thread sizes reversed and this code violates the 512 thread max (honest noob mistake…).

My question then becomes: How do I detect errors such as this and not get weird run-time behavior such as the above message? I realize in this case I could simply check the dims of the grid and thread arguments. But surely there’s an error noted somewhere when I launch my mean kernel…