What is the correct way to copy a cufftHandle?

You should always perform error checking of cuda api calls / cufft api calls. In case 2, fft_execute_forward does not fail silently. With correct error checking you would see that the return value is CUFFT_INVALID_PLAN .

There are a few options to work around your issue, but it’s not an issue specific to cufft.

Option1: Delete copy constructor and copy assignment, use move constructor and move assignment instead.
Option2: Create a new plan when making a copy.

Option 2 could look like this:

#include <iostream>
#include <cassert>

#include "cufft.h"
#include "thrust/complex.h"

cufftHandle plan_batched_1d_ffts(int Nbatch, int Nx) {
  int dimN[1] = {Nx};  // signal's size (number of spatial points)

  int inembed[1] = {Nx};  // storage is same as dimN - no-padding!
  int onembed[1] = {Nx};

  int inputStride = 1;  // dist. between successive input elements
  int outputStride = inputStride;

  int inputDist = Nx;  // dist. between 1st elem. in successive input signals
  int outputDist = inputDist;

  cufftHandle plan;

  cufftResult status = cufftPlanMany(&plan, 1, dimN, inembed, inputStride, inputDist, onembed,
                outputStride, outputDist, CUFFT_C2C, Nbatch);
  std::cerr << "cufftPlanMany status = " << status << "\n";
  assert(status == CUFFT_SUCCESS);

  return plan;
}

void fft_execute_forward(thrust::complex<float> *data, cufftHandle *handle) {
  cufftResult status = cufftExecC2C(*handle, (cufftComplex *)data, (cufftComplex *)data,
               CUFFT_FORWARD);
  std::cerr << "cufftExecC2C status = " << status << "\n";
  assert(status == CUFFT_SUCCESS);
}

class A {
 public:
  cufftHandle _handle{};
  int _Nbatch{};
  int _Nx{};
  thrust::complex<float> *_data{};

  A() = default;
  A(int Nbatch, int Nx);
  A(const A &other);
  A &operator=(const A &other);
  ~A();
};

/* Constructor */
A::A(int Nbatch, int Nx) : _Nbatch(Nbatch), _Nx(Nx) {

  cudaError_t status = cudaMalloc((void **)&_data, sizeof(thrust::complex<float>) * _Nbatch * _Nx);
  assert(status == cudaSuccess);
  _handle = plan_batched_1d_ffts(_Nbatch, _Nx);
  
}

/* copy-Constructor - delegate to normal constructor to create data and plan*/
A::A(const A &other) : A(other._Nbatch, other._Nx) {
  cudaError_t status = cudaMemcpy(_data, other._data, sizeof(thrust::complex<float>) * _Nbatch * _Nx,
             cudaMemcpyDeviceToDevice);
    assert(status == cudaSuccess);
}

/* copy-assignment operator */
A &A::operator=(const A &other) {
  if (this != &other) {
    A tmp(other); //copy constructor, creates valid data and plan.
      std::swap(this->_handle, tmp._handle);
      std::swap(this->_Nbatch, tmp._Nbatch);
      std::swap(this->_Nx, tmp._Nx);
      std::swap(this->_data, tmp._data);
     //old plan and data of *this will be cleaned up when tmp goes out of scope.
  }
  return *this;
}

/* Destructor */
A::~A() {
  cudaFree(_data);
  cufftDestroy(_handle);
}



#if 0

int main() {
  const int Nbatch = 10;
  const int Nx = 5;

  A obj1(Nbatch, Nx); /* create obj1 using constructor */
  A obj2;
  obj2 = obj1; /*  obj2 is a copy of obj1 */

  /* perform a forward FFT on the data of obj2 */
  fft_execute_forward(obj2._data, &obj2._handle);

  return 0;
}

#else

int main() {
  const int Nbatch = 10;
  const int Nx = 5;

  A obj2;

  {
    A obj1(Nbatch, Nx);
    obj2 = obj1;
  } /* here obj1 goes out of scope */

  fft_execute_forward(obj2._data, &obj2._handle);

  return 0;
}

#endif