Hi,
I’m currently playing with the low level virtual address APIs.
During testing, i found out some weird results on basic cuMemAddressReserve API.
Simply,
I allocated the device memory and free with basic cuMemAlloc and cuMemFree.
After that, i try to reserve same virtual address allocated by above cuMemAlloc by cuMemAddressReserve API.
But what i got is different virtual address…
I attach full source code and results.
I’m running it under CUDA 12.1 with RTX 3090.
Did i do something wrong? or does this result is correct?
int main()
{
CUcontext ctx;
CUdevice dev;
CUresult status;
cuInit(0);
cuDevicePrimaryCtxRetain(&ctx,0);
cuCtxSetCurrent(ctx);
cuCtxGetDevice(&dev);
CUdeviceptr reference;
size_t size = sizeof(float);
status = cuMemAlloc(&reference, size);
assert(status == CUDA_SUCCESS);
fprintf(stderr,"%50s: %p\n", "Address", reference);
status = cuMemFree(reference);
assert(status == CUDA_SUCCESS);
fprintf(stderr,"%50s: %p\n","Address after free", reference);
CUdeviceptr new_ptr;
CUmemAllocationProp prop;
CUmemAccessDesc accessDesc;
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = (int)dev;
prop.win32HandleMetaData = NULL;
accessDesc.location = prop.location;
accessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
size_t chunk_sz;
status = cuMemGetAllocationGranularity(&chunk_sz, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
const size_t aligned_sz = ((size + chunk_sz - 1) / chunk_sz) * chunk_sz;
status = cuMemAddressReserve(&new_ptr, aligned_sz, 0, reference, 0);
assert(status == CUDA_SUCCESS);
fprintf(stderr,"%50s: %p\n","new_ptr", new_ptr);
}