Question about cudaPointerGetAttributes in uvm

Here is my code.

#include <stdio.h>
#include <cuda_runtime.h>

#define CHECK_CUDA(call) \
do { \
    cudaError_t err = call; \
    if (err != cudaSuccess) { \
        fprintf(stderr, "CUDA error at %s:%d code=%d(%s)\n", \
                __FILE__, __LINE__, err, cudaGetErrorString(err)); \
        exit(EXIT_FAILURE); \
    } \
} while (0)

void check_memory_location(const void* ptr) {
    cudaPointerAttributes attributes;
    CHECK_CUDA(cudaPointerGetAttributes(&attributes, ptr));
    
    switch (attributes.type) {
        case cudaMemoryTypeUnregistered:
            printf("pointer %p: unregister memory\n", ptr);
            break;
        case cudaMemoryTypeHost:
            printf("pointer %p: host memory %d\n", ptr, attributes.device);
            break;
        case cudaMemoryTypeDevice:
            printf("pointer %p: device memory (device %d)\n", 
                  ptr, attributes.device);
            break;
        case cudaMemoryTypeManaged:
            printf("pointer %p: united memory (location: %s)\n", 
                  ptr, attributes.device == -1 ? "host" : "device");
            break;
        default:
            printf("invalid\n");
    }
}

int main() {
    const size_t size = 1024 * sizeof(float);
    
    float* d_data;
    CHECK_CUDA(cudaMalloc(&d_data, size));
    check_memory_location(d_data);
    
    float* h_data = (float*)malloc(size);
    check_memory_location(h_data);
    
    float* pinned_data;
    CHECK_CUDA(cudaMallocHost(&pinned_data, size));
    check_memory_location(pinned_data);
    
    float* uvm_data;
    CHECK_CUDA(cudaMallocManaged(&uvm_data, size));
    check_memory_location(uvm_data);
    
    CHECK_CUDA(cudaMemPrefetchAsync(uvm_data, size, 0));
    CHECK_CUDA(cudaDeviceSynchronize());
    check_memory_location(uvm_data);
    
    CHECK_CUDA(cudaMemPrefetchAsync(uvm_data, size, cudaCpuDeviceId));
    CHECK_CUDA(cudaDeviceSynchronize());
    check_memory_location(uvm_data);
    
    CHECK_CUDA(cudaFree(d_data));
    free(h_data);
    CHECK_CUDA(cudaFreeHost(pinned_data));
    CHECK_CUDA(cudaFree(uvm_data));
    
    return 0;
}

output:

pointer 0x7f7736800000: device memory (device 0)
pointer 0x55b5df608ae0: unregister memory
pointer 0x7f7736a00000: host memory 0
pointer 0x7f772c000000: united memory (location: device)
pointer 0x7f772c000000: united memory (location: device)
pointer 0x7f772c000000: united memory (location: device)

I have some misunderstanding about uvm. After I Prefetch uvm memory to CPU, why memory location is still in device.

  • If ptr has memory type cudaMemoryTypeDevice then this identifies the device on which the memory referred to by ptr physically resides. If ptr has memory type cudaMemoryTypeHost then this identifies the device which was current when the allocation was made (and if that device is deinitialized then this allocation will vanish with that device’s state).

The nvidia doc about paramdevice is only cudaMemoryTypeHost and cudaMemoryTypeDevice, not involved cudaMemoryTypeManaged. So when using united memory, how to distinguish if memory is in device or host.

The pointer introspection does not tell you where the data is located. In a demand-paged environment, some of the pages could be located in host memory and some of the pages could be located in device memory.

There is no way that I know of to identify the location of a particular UM page, or of an entire UM allocation.

The pointer introspection refers to the UM allocation as “Device” because it is accessible from Device code.

1 Like

Got it. Thanks for you reply.