In this example, if I try to change the node parameters of a Memcpy node on an executable graph, I get an invalid value error if the device pointer is one from a cudaMallocAsync
.
#include <cstdio>
#include <vector>
#define gpuErrchk(ans) \
{ gpuAssert((ans), __FILE__, __LINE__); }
__host__ inline void gpuAssert(cudaError_t code, const char* file, int line,
bool abort = true) {
if (code != cudaSuccess) {
printf("GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
}
}
int main() {
cudaGraph_t graph;
gpuErrchk(cudaGraphCreate(&graph, 0));
int* indices;
gpuErrchk(cudaMalloc(&indices, 10 * sizeof(int)));
std::vector<int> indices_host(10);
cudaGraphNode_t copy_indices_node;
gpuErrchk(cudaGraphAddMemcpyNode1D(©_indices_node, graph, nullptr, 0,
indices, indices_host.data(),
10 * sizeof(int), cudaMemcpyHostToDevice));
cudaGraphExec_t graphExec;
gpuErrchk(cudaGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
// Works
gpuErrchk(cudaGraphExecMemcpyNodeSetParams1D(
graphExec, copy_indices_node, indices + 1, indices_host.data() + 2,
4 * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaFree(indices));
void* async_indices;
gpuErrchk(cudaMallocAsync(&async_indices, 1000, 0));
// Fails
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaGraphExecMemcpyNodeSetParams1D(
graphExec, copy_indices_node, async_indices, indices_host.data() + 10,
8 * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaFree(async_indices));
return 0;
}
Compiled with nvcc -o example -arch=sm_61 test.cu
, CUDA 12.5.
The documentation on cudaGraphExecMemcpyNodeSetParams1D
states:
Returns cudaErrorInvalidValue if the memory operands’ mappings changed or the original memory operands are > multidimensional.
What does “the memory operands’ mappings changed”? Is the mapping of cudaMallocAsync
different from that of cudaMalloc
? If not, why is this error present? Thanks for the help!