Hi,
I am trying to use callback functions, and they seem to not be working. I am running this code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cufft.h>
#include <cufftXt.h>
#include <helper_cuda.h>
// basic callback function
static __device__ void our_store(void *data, size_t index, cufftDoubleComplex v, void *cInfo, void *shared) {
((cufftDoubleComplex*)data)[index].x = 0.3;
((cufftDoubleComplex*)data)[index].y = 0.3;
}
__device__ cufftCallbackStoreZ callbackPtr = our_store;
int main() {
int version;
cufftGetVersion(&version);
printf("DFT %d using cuFFT v%d\n", N, version);
cufftCallbackStoreZ hostCopyOfStore;
checkCudaErrors( cudaMemcpyFromSymbol(&hostCopyOfStore, callbackPtr, sizeof(hostCopyOfStore)) );
cufftDoubleComplex *data_in, *data_out;
checkCudaErrors( cudaMalloc((void**)&data_in, sizeof(cufftDoubleComplex)*N) );
checkCudaErrors( cudaMalloc((void**)&data_out, sizeof(cufftDoubleComplex)*N) );
cufftDoubleComplex *in, *out;
checkCudaErrors( cudaMallocHost((void**)&in, sizeof(cufftDoubleComplex)*N) );
checkCudaErrors( cudaMallocHost((void**)&out, sizeof(cufftDoubleComplex)*N) );
int dims[1] = {N};
for (int i = 0; i < N; i++) {
//printf("%d\n", i);
in[i].x = i;
in[i].y = 0;
}
checkCudaErrors( cudaMemcpy(data_in, in, N*sizeof(cufftDoubleComplex), cudaMemcpyHostToDevice) );
cufftHandle plan;
cufftCreate(&plan);
size_t workSize;
//if (cufftPlanMany(&plan, 1, dims, NULL, 1, N, NULL, 1, N, CUFFT_Z2Z, 1) != CUFFT_SUCCESS) return -1;
if (cufftMakePlanMany(plan, 1, dims, NULL, 1, N, NULL, 1, N, CUFFT_Z2Z, 1, &workSize) != CUFFT_SUCCESS) return -1;
checkCudaErrors( cufftXtSetCallback(plan, (void**)&hostCopyOfStore, CUFFT_CB_ST_COMPLEX_DOUBLE, NULL) );
if (cufftExecZ2Z(plan, data_in, data_out, CUFFT_FORWARD) != CUFFT_SUCCESS) return -1;
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors( cudaMemcpy(out, data_out, N*sizeof(cufftDoubleComplex), cudaMemcpyDeviceToHost) );
for (int i = 0; i < N; i++) printf("(%f %f)\n", out[i].x, out[i].y);
and my output is
DFT 4 using cuFFT v10000
(6.000000 0.000000)
(-2.000000 2.000000)
(-2.000000 0.000000)
(-2.000000 -2.000000)
but I am expecting all of the results to be 0.3 based on the callback function.
The Makefile I am using is
DEFINES = -DN=4
CFLAGS = -m64 -I/usr/local/cuda-10.0/samples/common/inc
LIBS = -lcufft_static -lculibos
all: test
test: test.cu
nvcc -o $@ $(CFLAGS) -DN=4 $^ $(LIBS)
I am running on nvcc V10.0.130 and ubuntu 16.04 and the GPU is a Titan V. Any help as to why I am running into this issue would be greatly appreciated.