Cufft 1D can't create plan

Hi everyone,
I’m trying to create cufft 1D plan and got fault.
I launched the following below sample of code:

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>


#include <complex>
#include <iostream>
#include <vector>
#include <cufft.h>

//#include "cufft_utils.h"
 // CUDA API error checking
#ifndef CUDA_RT_CALL
#define CUDA_RT_CALL( call )                                                                                           \
    {                                                                                                                  \
        auto status = static_cast<cudaError_t>( call );                                                                \
        if ( status != cudaSuccess )                                                                                   \
            fprintf( stderr,                                                                                           \
                     "ERROR: CUDA RT call \"%s\" in line %d of file %s failed "                                        \
                     "with "                                                                                           \
                     "%s (%d).\n",                                                                                     \
                     #call,                                                                                            \
                     __LINE__,                                                                                         \
                     __FILE__,                                                                                         \
                     cudaGetErrorString( status ),                                                                     \
                     status );                                                                                         \
    }
#endif  // CUDA_RT_CALL

// cufft API error chekcing
#ifndef CUFFT_CALL
#define CUFFT_CALL( call )                                                                                             \
    {                                                                                                                  \
        auto status = static_cast<cufftResult>( call );                                                                \
        if ( status != CUFFT_SUCCESS )                                                                                 \
            fprintf( stderr,                                                                                           \
                     "ERROR: CUFFT call \"%s\" in line %d of file %s failed "                                          \
                     "with "                                                                                           \
                     "code (%d).\n",                                                                                   \
                     #call,                                                                                            \
                     __LINE__,                                                                                         \
                     __FILE__,                                                                                         \
                     status );                                                                                         \
    }
#endif  // CUFFT_CALL

__global__
void scaling_kernel(cufftComplex* data, int element_count, float scale) {
    const int tid = threadIdx.x + blockIdx.x * blockDim.x;
    const int stride = blockDim.x * gridDim.x;
    for (auto i = tid; i < element_count; i += stride) {
        data[tid].x *= scale;
        data[tid].y *= scale;
    }
}


int main(int argc, char* argv[]) {
    cufftHandle plan;
    cudaStream_t stream = NULL;

    int fft_size = 8;
    int batch_size = 2;
    int element_count = batch_size * fft_size;

    using scalar_type = float;
    using data_type = std::complex<scalar_type>;

    std::vector<data_type> data(element_count, 0);

    for (int i = 0; i < element_count; i++) {
        data[i] = data_type(i, -i);
    }

    std::printf("Input array:\n");
    for (auto& i : data) {
        std::printf("%f + %fj\n", i.real(), i.imag());
    }
    std::printf("=====\n");

    cufftComplex* d_data = nullptr;

    CUFFT_CALL(cufftCreate(&plan));
    CUFFT_CALL(cufftPlan1d(&plan, fft_size, CUFFT_C2C, batch_size));

    CUDA_RT_CALL(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
    CUFFT_CALL(cufftSetStream(plan, stream));

    // Create device data arrays
    CUDA_RT_CALL(cudaMalloc(reinterpret_cast<void**>(&d_data), sizeof(data_type) * data.size()));
    CUDA_RT_CALL(cudaMemcpyAsync(d_data, data.data(), sizeof(data_type) * data.size(),
        cudaMemcpyHostToDevice, stream));

    /*
     * Note:
     *  Identical pointers to data and output arrays implies in-place transformation
     */
    CUFFT_CALL(cufftExecC2C(plan, d_data, d_data, CUFFT_FORWARD));

    // Normalize the data
    scaling_kernel << <1, 128, 0, stream >> > (d_data, element_count, 1.f / fft_size);

    // The original data should be recovered after Forward FFT, normalization and inverse FFT
    CUFFT_CALL(cufftExecC2C(plan, d_data, d_data, CUFFT_INVERSE));

    CUDA_RT_CALL(cudaMemcpyAsync(data.data(), d_data, sizeof(data_type) * data.size(),
        cudaMemcpyDeviceToHost, stream));

    CUDA_RT_CALL(cudaStreamSynchronize(stream));

    std::printf("Output array after Forward FFT, Normalization, and Inverse FFT :\n");
    for (auto& i : data) {
        std::printf("%f + %fj\n", i.real(), i.imag());
    }
    std::printf("=====\n");

    /* free resources */
    CUDA_RT_CALL(cudaFree(d_data))

    CUFFT_CALL(cufftDestroy(plan));

    CUDA_RT_CALL(cudaStreamDestroy(stream));

    CUDA_RT_CALL(cudaDeviceReset());

    return EXIT_SUCCESS;
}

and got diagnostics:
ERROR: CUFFT call “cufftPlan1d(&plan, fft_size, CUFFT_C2C, batch_size)” in line 86 of file kernel.cu failed with code (5).
ERROR: CUFFT call “cufftSetStream(plan, stream)” in line 89 of file kernel.cu failed with code (1).
ERROR: CUFFT call “cufftExecC2C(plan, d_data, d_data, CUFFT_FORWARD)” in line 100 of file kernel.cu failed with code (1)
I can’t handle this error. On my local machine this code is working correctly, but on the remote server is not working. The program compiles. Perhaps someone will sympathize with me and tell me what I’m doing wrong?
I launched the program using the following below command:
nvcc -ccbin /usr/bin/g++-10 *cu -o run -lcufft
Yury.

when posting code on these forums, please format it correctly. A simple process for that could be:

  1. Edit your post by selecting the pencil icon below it.
  2. Select all the code in the edit window
  3. Press the </> button at the top of the edit window
  4. save your changes.

I don’t have any issues running the code you have posted. Error code 1 from CUFFT is “invalid plan”.

That doesn’t make much sense to me, so you may have an improper CUDA setup on that remote machine.

Could you try running CUDA samples on the same remote server and check if those pass? GitHub - NVIDIA/cuda-samples: Samples for CUDA Developers which demonstrates features in CUDA Toolkit