cuPointerSetAttribute error(CUDA_ERROR_NOT_SUPPORTED) with CUDA virtual memory management API

MinaZ5 · May 22, 2024, 2:39am

Hi,
I use the cuda low-level virtual memory management api to allocate a device memory, and then I want to synchronize memory operation initiated on this region, but get the error "cuPointerSetAttribute error(CUDA_ERROR_NOT_SUPPORTED) "
Here is my code.

#include <cstddef>
#include <iostream>
#include <cuda.h>
int main() {
    int currentDevice = 0;
    cudaSetDevice(currentDevice);
    cudaFree(0);

    int deviceSupportsVmm;
    CUresult r = cuDeviceGetAttribute(&deviceSupportsVmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, 0);
    if(deviceSupportsVmm != 0) {
        printf("device support Virtual Memory Management\n");
    }

    size_t size = 2 * 1024 * 1024;
    CUmemAllocationProp prop = {};
    memset(&prop,0,sizeof(prop));

    size_t granularity = 0;
    CUresult result;
    // Calculates either the minimal or recommended granularity.
    result = cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
    if (result != CUDA_SUCCESS) {
        printf("cudaErrorMemoryAllocation error %d\n",result);
        return cudaErrorMemoryAllocation;
    }

    size = ((size - 1) / granularity + 1) * granularity;

    CUmemGenericAllocationHandle allocationHandle;
    prop.type          = CU_MEM_ALLOCATION_TYPE_PINNED;
    prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
    prop.location.id   = currentDevice;
    prop.allocFlags.gpuDirectRDMACapable = 1;
    // Create a CUDA memory handle representing a memory allocation of a given size described by the given properties.
    result = cuMemCreate(&allocationHandle, size, &prop, 0);
    if (result != CUDA_SUCCESS) {
        printf("cuMemCreate error %d\n",result);
        return cudaErrorMemoryAllocation;
    }

    CUdeviceptr dptr;
    // Allocate an address range reservation.
    result = cuMemAddressReserve(&dptr, size, 0, 0, 0);
    if (result != CUDA_SUCCESS) {
        printf("cuMemAddressReserve error %d\n",result);
        return cudaErrorMemoryAllocation;
    }
    printf("dptr = %p\n",dptr);
    
    // Maps an allocation handle to a reserved virtual address range.
    // cuMemMap can only create mappings on VA range reservations that are not currently mapped.
    result = cuMemMap(dptr, size, 0, allocationHandle, 0);
    if (result != CUDA_SUCCESS) {
        printf("cuMemMap error %d\n",result);
        return cudaErrorMemoryAllocation;
    }

    CUmemAccessDesc accessDescriptor;
    accessDescriptor.location.id   = prop.location.id;
    accessDescriptor.location.type = prop.location.type;
    accessDescriptor.flags         = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
    // Set the access flags for each location specified in desc for the given virtual address range.
    // Any new mapping to this virtual address will need to have access granted through cuMemSetAccess, as all mappings start with no accessibility setup.
    result = cuMemSetAccess(dptr, size, &accessDescriptor, 1);
    if (result != CUDA_SUCCESS) {
        printf("cuMemSetAccess error %d\n",result);
        return cudaErrorMemoryAllocation;
    }

    CUmemorytype type;
    CUresult err = cuPointerGetAttribute(&type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,dptr);
    if(err != CUDA_SUCCESS) {
        printf("cuPointerGetAttribute error %d\n",err);
    }
    else {
        if(type == CU_MEMORYTYPE_DEVICE) {
            printf("pointer(%p) addresses device memory\n",dptr);
        }
    }

    uint flags = 1;
    err = cuPointerSetAttribute(&flags, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,dptr);
    if(err != CUDA_SUCCESS) {
        printf("cuPointerSetAttribute error %d\n",err);
    }

    return cudaSuccess;
}

The running results are as follows, where error 801 is CUDA_ERROR_NOT_SUPPORTED.

Robert_Crovella · May 22, 2024, 3:03am

What CUDA version are you using?
What GPU are you running on?
What OS?

MinaZ5 · May 22, 2024, 3:13am

CUDA version: CUDA 11.7
GPU: NVIDIA Tesla V100-PCIE-32GB
OS: Ubuntu 20.04.6

Robert_Crovella · May 22, 2024, 3:52am

It seems to be not supported (for a VMM API allocated region). try using cuMemAlloc (or cudaMalloc) instead.

MinaZ5 · May 22, 2024, 5:32am

Thanks for your good suggestion. But I want to implement GPUDirect RDMA with CUDA virtual memory management API.
I have read the topic Introducing Low-Level GPU Virtual Memory Management, and I found that the low level virtual memory APIs do support GPUDirect RDMA. Since the attribute in GPUDirect RDMA scope should be CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, I tried to use cuPointerSetAttribute to set this attribute and failed.

Topic		Replies	Views
Trying to get GPUdirect RDMA working. CUDA Setup and Installation	2	1600	April 10, 2014
cuMemAlloc_v2 return address out of range CUDA Programming and Performance	7	1977	June 11, 2019
cudaGetDeviceCount error 3 (cudaErrorInitializationError) CUDA Programming and Performance	4	3281	March 22, 2021
cudaMallocManaged() not working CUDA Programming and Performance	1	2328	November 18, 2018
About cudaHostGetDevicePointer problem CUDA Programming and Performance	1	8945	March 30, 2009
Pinned memory error invalid device pointer CUDA Programming and Performance	9	6076	April 10, 2009
Got out of memory from cudaMemcpy CUDA Programming and Performance	13	3931	January 28, 2022
No CUDA device after rebooting... CUDA Programming and Performance	7	10969	December 7, 2011
Error 500 related to cuMemGetAddressRange CUDA Programming and Performance	2	4200	July 2, 2014
CUDA error, bandwithTest.exe CUDA Setup and Installation	12	2481	January 21, 2019

cuPointerSetAttribute error(CUDA_ERROR_NOT_SUPPORTED) with CUDA virtual memory management API

Related topics