"cudaGraphExecMemcpyNodeSetParams1D" fails when using device pointer from a "cudaMallocAsync"

In this example, if I try to change the node parameters of a Memcpy node on an executable graph, I get an invalid value error if the device pointer is one from a cudaMallocAsync.

#include <cstdio>
#include <vector>

#define gpuErrchk(ans) \
  { gpuAssert((ans), __FILE__, __LINE__); }
__host__ inline void gpuAssert(cudaError_t code, const char* file, int line,
                               bool abort = true) {
  if (code != cudaSuccess) {
    printf("GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
  }
}

int main() {
  cudaGraph_t graph;
  gpuErrchk(cudaGraphCreate(&graph, 0));
  int* indices;
  gpuErrchk(cudaMalloc(&indices, 10 * sizeof(int)));

  std::vector<int> indices_host(10);

  cudaGraphNode_t copy_indices_node;
  gpuErrchk(cudaGraphAddMemcpyNode1D(&copy_indices_node, graph, nullptr, 0,
                                     indices, indices_host.data(),
                                     10 * sizeof(int), cudaMemcpyHostToDevice));

  cudaGraphExec_t graphExec;
  gpuErrchk(cudaGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));

  // Works
  gpuErrchk(cudaGraphExecMemcpyNodeSetParams1D(
      graphExec, copy_indices_node, indices + 1, indices_host.data() + 2,
      4 * sizeof(int), cudaMemcpyHostToDevice));

  gpuErrchk(cudaFree(indices));

  void* async_indices;
  gpuErrchk(cudaMallocAsync(&async_indices, 1000, 0));

  // Fails
  gpuErrchk(cudaDeviceSynchronize());
  gpuErrchk(cudaGraphExecMemcpyNodeSetParams1D(
      graphExec, copy_indices_node, async_indices, indices_host.data() + 10,
      8 * sizeof(int), cudaMemcpyHostToDevice));

  gpuErrchk(cudaFree(async_indices));

  return 0;
}

Compiled with nvcc -o example -arch=sm_61 test.cu, CUDA 12.5.

The documentation on cudaGraphExecMemcpyNodeSetParams1D states:

Returns cudaErrorInvalidValue if the memory operands’ mappings changed or the original memory operands are > multidimensional.

What does “the memory operands’ mappings changed”? Is the mapping of cudaMallocAsync different from that of cudaMalloc? If not, why is this error present? Thanks for the help!

Bump. Would really appreciate some help.