CUFFT Callback returning zeros

Hello,

I have an issue with my cufft code.

Here is the code


enum Norm { None = 0, Forward = 1, Backward = 2, Ortho = 3 };

typedef struct _cb_params {
    int norm;
    int direction;
    bool shift;
    int size;
} cb_params;

static __device__ void norm_and_fftshift(void *dataOut, size_t offset, cufftComplex element, void *callerInfo,
                                         void *sharedPointer) {

    cb_params *params = (cb_params *)callerInfo;
    cufftComplex *data = (cufftComplex *)dataOut;
    int norm = params->norm;
    int direction = params->direction;
    bool shift = params->shift;
    int size = params->size;

    if (norm == Norm::Backward && direction == CUFFT_INVERSE ||
        norm == Norm::Forward && direction == CUFFT_FORWARD) {
        float norm_factor = 1.0f / (float)size;
        element.x *= norm_factor;
        element.y *= norm_factor;
    } else if (norm == Norm::Ortho) {
        float norm_factor = 1.0f / sqrtf((float)size);
        element.x /= norm_factor;
        element.y /= norm_factor;
    }

    if (shift) {
        int x = offset % size;
        int y = offset / size;
        int half = size / 2;
        int x_ = (x + half) % size;
        int y_ = (y + half) % size;
        int offset_ = y_ * size + x_;
        data[offset_] = element;
    } else {
        data[offset] = element;
    }
}

void init()
{
...
    // Initialize cufftCallbacks
    cufftCallbackStoreC h_storeCallbackPtr;
    checkCudaErrors(
            cudaMemcpyFromSymbol(&h_storeCallbackPtr, norm_and_fftshiftPtr, sizeof(h_storeCallbackPtr)));
    
    int rank = 1;             
    int batch_size = 2;       
    int n[] = {4 * (i + 1)};  
    int inembed[] = {0};      // Stride of input data (meaningless but has to be set)
    int istride = 1;          // Distance between consecutive elements in the same batch always 1 since we
                                    // have contiguous data
    int idist = 1;  
                                    // batches, it is equal to the distance between the two rings
    int onembed[] = {0};            // Stride of output data (meaningless but has to be set)
    int ostride = 1;  // Distance between consecutive elements in the output batch, also 1 since
                            // everything is done in place
    int odist = 1; 
    
    CUFFT_CALL(cufftMakePlanMany(plan, rank, n, inembed, istride, idist, onembed, ostride, odist,
                                    CUFFT_C2C, batch_size, &polar_worksize));
    
    // Create the cb_params
    cb_params params;
    params.norm = Norm::None;
    params.direction = CUFFT_FORWARD;
    params.shift = false;
    params.size = n[0];
    // Set the callback
    CUFFT_CALL(cufftXtSetCallback(plan, (void **)&h_storeCallbackPtr, CUFFT_CB_ST_COMPLEX,
                                    (void **)&params));
...
}

When I use the callbacks I get zeros
Also, is there a way that I can get the direction of the fft in the callback (without settings it in the cb_params ?)

Thank you.

I have pinpointed the issue.

It is when I try to access my cb params

The kernel returns.

static __device__ void norm_and_fftshift(void *dataOut, size_t offset, cufftComplex element, void *callerInfo,
                                         void *sharedPointer) {
    cb_params *params = (cb_params *)callerInfo;
    cufftComplex *data = (cufftComplex *)dataOut;

    // data[offset] = cufftComplex{0.0f, 0.0f};

    int norm = params->norm;
    int direction = params->direction;
    bool shift = params->shift;
    int size = params->size;

    //printf("norm: %d, ", norm);
    //printf("direction: %d, ", direction);
    //printf("shift: %d, ", shift);
    //printf("size: %d\n", size);
    data[offset] = element;
    return;
}

This runs … but I need the parameters

Found the issue

Need to copy the params to device


        // Create the cb_params
        cb_params* params = new cb_params;
        params->norm = NONE_NORM;
        params->direction = CUFFT_FORWARD;
        params->shift = false;
        params->size = n[0];
        // Set the callback
        cb_params* dev_params;
        cudaMalloc(&dev_params, sizeof(cb_params));
        cudaMemcpy(dev_params, params, sizeof(cb_params), cudaMemcpyHostToDevice);

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.