Hi,
I use the cuda low-level virtual memory management api to allocate a device memory, and then I want to synchronize memory operation initiated on this region, but get the error "cuPointerSetAttribute error(CUDA_ERROR_NOT_SUPPORTED) "
Here is my code.
#include <cstddef>
#include <iostream>
#include <cuda.h>
int main() {
int currentDevice = 0;
cudaSetDevice(currentDevice);
cudaFree(0);
int deviceSupportsVmm;
CUresult r = cuDeviceGetAttribute(&deviceSupportsVmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, 0);
if(deviceSupportsVmm != 0) {
printf("device support Virtual Memory Management\n");
}
size_t size = 2 * 1024 * 1024;
CUmemAllocationProp prop = {};
memset(&prop,0,sizeof(prop));
size_t granularity = 0;
CUresult result;
// Calculates either the minimal or recommended granularity.
result = cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
if (result != CUDA_SUCCESS) {
printf("cudaErrorMemoryAllocation error %d\n",result);
return cudaErrorMemoryAllocation;
}
size = ((size - 1) / granularity + 1) * granularity;
CUmemGenericAllocationHandle allocationHandle;
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = currentDevice;
prop.allocFlags.gpuDirectRDMACapable = 1;
// Create a CUDA memory handle representing a memory allocation of a given size described by the given properties.
result = cuMemCreate(&allocationHandle, size, &prop, 0);
if (result != CUDA_SUCCESS) {
printf("cuMemCreate error %d\n",result);
return cudaErrorMemoryAllocation;
}
CUdeviceptr dptr;
// Allocate an address range reservation.
result = cuMemAddressReserve(&dptr, size, 0, 0, 0);
if (result != CUDA_SUCCESS) {
printf("cuMemAddressReserve error %d\n",result);
return cudaErrorMemoryAllocation;
}
printf("dptr = %p\n",dptr);
// Maps an allocation handle to a reserved virtual address range.
// cuMemMap can only create mappings on VA range reservations that are not currently mapped.
result = cuMemMap(dptr, size, 0, allocationHandle, 0);
if (result != CUDA_SUCCESS) {
printf("cuMemMap error %d\n",result);
return cudaErrorMemoryAllocation;
}
CUmemAccessDesc accessDescriptor;
accessDescriptor.location.id = prop.location.id;
accessDescriptor.location.type = prop.location.type;
accessDescriptor.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
// Set the access flags for each location specified in desc for the given virtual address range.
// Any new mapping to this virtual address will need to have access granted through cuMemSetAccess, as all mappings start with no accessibility setup.
result = cuMemSetAccess(dptr, size, &accessDescriptor, 1);
if (result != CUDA_SUCCESS) {
printf("cuMemSetAccess error %d\n",result);
return cudaErrorMemoryAllocation;
}
CUmemorytype type;
CUresult err = cuPointerGetAttribute(&type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,dptr);
if(err != CUDA_SUCCESS) {
printf("cuPointerGetAttribute error %d\n",err);
}
else {
if(type == CU_MEMORYTYPE_DEVICE) {
printf("pointer(%p) addresses device memory\n",dptr);
}
}
uint flags = 1;
err = cuPointerSetAttribute(&flags, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,dptr);
if(err != CUDA_SUCCESS) {
printf("cuPointerSetAttribute error %d\n",err);
}
return cudaSuccess;
}
The running results are as follows, where error 801 is CUDA_ERROR_NOT_SUPPORTED.