cuMemAddresssReserve works weird

Hi,
I’m currently playing with the low level virtual address APIs.
During testing, i found out some weird results on basic cuMemAddressReserve API.

Simply,
I allocated the device memory and free with basic cuMemAlloc and cuMemFree.
After that, i try to reserve same virtual address allocated by above cuMemAlloc by cuMemAddressReserve API.

But what i got is different virtual address…

I attach full source code and results.
I’m running it under CUDA 12.1 with RTX 3090.

Did i do something wrong? or does this result is correct?

int main()
{

CUcontext ctx;
CUdevice dev;
CUresult status; 

cuInit(0);
cuDevicePrimaryCtxRetain(&ctx,0);
cuCtxSetCurrent(ctx);
cuCtxGetDevice(&dev);

CUdeviceptr reference;
size_t size = sizeof(float);

status = cuMemAlloc(&reference, size);
assert(status == CUDA_SUCCESS);
fprintf(stderr,"%50s: %p\n", "Address", reference);

status = cuMemFree(reference);
assert(status == CUDA_SUCCESS);
fprintf(stderr,"%50s: %p\n","Address after free", reference);

CUdeviceptr new_ptr;

CUmemAllocationProp prop;
CUmemAccessDesc accessDesc;
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = (int)dev;
prop.win32HandleMetaData = NULL;

accessDesc.location = prop.location;
accessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;

size_t chunk_sz;
status = cuMemGetAllocationGranularity(&chunk_sz, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);

const size_t aligned_sz = ((size + chunk_sz - 1) / chunk_sz) * chunk_sz;
status = cuMemAddressReserve(&new_ptr, aligned_sz, 0, reference, 0);
assert(status == CUDA_SUCCESS);
fprintf(stderr,"%50s: %p\n","new_ptr", new_ptr);

}