Hello, @Cory.Perry
I’m working with the cuMemCreate functions and found out some weird, probably bug, situation.
When i try to allocate the device memory by cuMemCreate, it only works when the vector object is defined in the program.
I will attach full codes
– main –
#include <iostream>
#include <iomanip>
#include <chrono>
#include <thread>
#include <assert.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime.h>
static inline void
checkDrvError(CUresult res, const char *tok, const char *file, unsigned line)
{
if (res != CUDA_SUCCESS) {
const char *errStr = NULL;
(void)cuGetErrorString(res, &errStr);
std::cerr << file << ':' << line << ' ' << tok
<< "failed (" << (unsigned)res << "): " << errStr << std::endl;
}
}
#define CHECK_DRV(x) checkDrvError(x, #x, __FILE__, __LINE__);
int main()
{
std::vector<int> test;
size_t free;
typedef unsigned char ElemType;
CUcontext ctx;
CUdevice dev;
int supportsVMM = 0;
CHECK_DRV(cuInit(0));
CHECK_DRV(cuDevicePrimaryCtxRetain(&ctx, 0));
CHECK_DRV(cuCtxSetCurrent(ctx));
CHECK_DRV(cuCtxGetDevice(&dev));
CHECK_DRV(cuDeviceGetAttribute(&supportsVMM, CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED, dev));
fprintf(stderr, "SupportsVMM: %d\n", supportsVMM);
CUresult status = CUDA_SUCCESS;
cudaError_t error = cudaSuccess;
CUmemAllocationProp prop;
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = (int)dev;
prop.win32HandleMetaData = NULL;
CUmemAccessDesc accessDesc;
accessDesc.location = prop.location;
accessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
size_t chunk_sz;
cuMemGetAllocationGranularity(&chunk_sz, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
assert(status == CUDA_SUCCESS);
size_t size = 4*1024;
const size_t aligned_size = ((size + chunk_sz -1)/chunk_sz) * chunk_sz;
CUmemGenericAllocationHandle handle;
CUdeviceptr new_ptr = 0ULL;
status = cuMemAddressReserve(&new_ptr, (aligned_size), 0ULL, 0ULL, 0ULL);
assert(status == CUDA_SUCCESS);
status = cuMemCreate(&handle, aligned_size, &prop, 0);
assert(status == CUDA_SUCCESS);
status = cuMemMap(new_ptr, aligned_size, 0, handle, 0);
assert(status == CUDA_SUCCESS);
status = cuMemSetAccess(new_ptr, aligned_size, &accessDesc, 1ULL);
assert(status == CUDA_SUCCESS);
float * dev_ptr = (float *)new_ptr;
error = cudaMemset(dev_ptr, 1, aligned_size);
assert(error == cudaSuccess);
}
– makefile –
NVCC ?= nvcc
all: vmm_main
vmm_main: vmm_main.cpp
$(NVCC) $^ -o $@ -lcuda -std=c++11
clean:
$(RM) vmm_main
My system is CUDA 12.1 with RTX 3090 Ubuntu 18.04.
If i just comment the first line " std::vector test",
program ends right after the cuMemCreate with return 1.
Did i do something wrong? or is it bug?
thx