Here is my code.
#include <stdio.h>
#include <cuda_runtime.h>
#define CHECK_CUDA(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
fprintf(stderr, "CUDA error at %s:%d code=%d(%s)\n", \
__FILE__, __LINE__, err, cudaGetErrorString(err)); \
exit(EXIT_FAILURE); \
} \
} while (0)
void check_memory_location(const void* ptr) {
cudaPointerAttributes attributes;
CHECK_CUDA(cudaPointerGetAttributes(&attributes, ptr));
switch (attributes.type) {
case cudaMemoryTypeUnregistered:
printf("pointer %p: unregister memory\n", ptr);
break;
case cudaMemoryTypeHost:
printf("pointer %p: host memory %d\n", ptr, attributes.device);
break;
case cudaMemoryTypeDevice:
printf("pointer %p: device memory (device %d)\n",
ptr, attributes.device);
break;
case cudaMemoryTypeManaged:
printf("pointer %p: united memory (location: %s)\n",
ptr, attributes.device == -1 ? "host" : "device");
break;
default:
printf("invalid\n");
}
}
int main() {
const size_t size = 1024 * sizeof(float);
float* d_data;
CHECK_CUDA(cudaMalloc(&d_data, size));
check_memory_location(d_data);
float* h_data = (float*)malloc(size);
check_memory_location(h_data);
float* pinned_data;
CHECK_CUDA(cudaMallocHost(&pinned_data, size));
check_memory_location(pinned_data);
float* uvm_data;
CHECK_CUDA(cudaMallocManaged(&uvm_data, size));
check_memory_location(uvm_data);
CHECK_CUDA(cudaMemPrefetchAsync(uvm_data, size, 0));
CHECK_CUDA(cudaDeviceSynchronize());
check_memory_location(uvm_data);
CHECK_CUDA(cudaMemPrefetchAsync(uvm_data, size, cudaCpuDeviceId));
CHECK_CUDA(cudaDeviceSynchronize());
check_memory_location(uvm_data);
CHECK_CUDA(cudaFree(d_data));
free(h_data);
CHECK_CUDA(cudaFreeHost(pinned_data));
CHECK_CUDA(cudaFree(uvm_data));
return 0;
}
output:
pointer 0x7f7736800000: device memory (device 0)
pointer 0x55b5df608ae0: unregister memory
pointer 0x7f7736a00000: host memory 0
pointer 0x7f772c000000: united memory (location: device)
pointer 0x7f772c000000: united memory (location: device)
pointer 0x7f772c000000: united memory (location: device)
I have some misunderstanding about uvm. After I Prefetch uvm memory to CPU, why memory location is still in device.
- If ptr has memory type cudaMemoryTypeDevice then this identifies the device on which the memory referred to by ptr physically resides. If ptr has memory type cudaMemoryTypeHost then this identifies the device which was current when the allocation was made (and if that device is deinitialized then this allocation will vanish with that device’s state).
The nvidia doc about paramdevice
is only cudaMemoryTypeHost and cudaMemoryTypeDevice, not involved cudaMemoryTypeManaged. So when using united memory, how to distinguish if memory is in device or host.