Cufftmp plan failure

I am seemingly unable to successfully create a plan for a 2D c2r/r2c cufftmp. The relevant part of the code is

    MPICHECK(MPI_Init(&argc,&argv));
    int p, rank;
    MPICHECK(MPI_Comm_size(MPI_COMM_WORLD,&p));
    MPICHECK(MPI_Comm_rank(MPI_COMM_WORLD,&rank));
    const int NX=1024, NY=1024, NYh=NY/2+1;
    int local_nx = NX / p;
    MPI_Comm mpi_comm = MPI_COMM_WORLD;


    cufftHandle plan_r2c, plan_c2r;
    CUFFT_CHECK(cufftCreate(&plan_r2c));
    CUFFT_CHECK(cufftCreate(&plan_c2r));
    // Global dims: X then Y
    int n2[2] = { NX, NY };

    // --- Build R2C plan (real -> complex) ---
    long long in_lo_r2c[2]   = { (long long)rank*local_nx,       0 };
    long long in_hi_r2c[2]   = { (long long)(rank+1)*local_nx,  NY };
    long long in_str_r2c[2]  = {        NY,                     1 };
    long long out_lo_r2c[2]  = { (long long)rank*local_nx,       0 };
    long long out_hi_r2c[2]  = { (long long)(rank+1)*local_nx,  NYh };
    long long out_str_r2c[2] = {        NYh,                    1 };

    size_t work_r2c;
    cufftResult r_r2c = cufftMpMakePlanDecomposition(
        plan_r2c, 2, n2,
        in_lo_r2c, in_hi_r2c, in_str_r2c,
        out_lo_r2c, out_hi_r2c, out_str_r2c,
        CUFFT_R2C, &mpi_comm, CUFFT_COMM_MPI, &work_r2c
    );

I also wrote a complete minimal program to reproduce this error

#include <mpi.h>
#include <cufftMp.h>
#include <iostream>

int main(int argc, char** argv) {
    MPI_Init(&argc, &argv);

    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm comm = MPI_COMM_WORLD;
    cufftHandle plan;
    cufftResult r;

    r = cufftCreate(&plan);
    if (r != CUFFT_SUCCESS) {
        if (rank==0) std::cerr << "cufftCreate failed: " << r << "\n";
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    size_t workSize;
    r = cufftMpMakePlan2d(
        plan,
        /*nx=*/32, /*ny=*/16,
        CUFFT_C2C,
        &comm, CUFFT_COMM_MPI,
        &workSize
    );

    if (rank == 0) {
        std::cout << "cufftMpMakePlan2d returned: " << r
                  << (r==CUFFT_SUCCESS ? ", workSize=" + std::to_string(workSize) : "")
                  << "\n";
    }

    cufftDestroy(plan);
    MPI_Finalize();
    return (r == CUFFT_SUCCESS ? 0 : 1);
}

both of these fail with error code 1. Any explanation on why this is occuring and how to fix it would be helpful. Thanks!