I’m using cuda release 11.0, V11.0.221
and driver 450.80.02
and got a nvvp
error:
==9811== Error: Internal profiling error 4142:999.
======== Error: CUDA profiling error.
#include <cuda.h>
#include <cassert>
#include <iostream>
CUresult status = CUDA_SUCCESS;
CUcontext ctx;
CUdevice dev;
size_t chunk_sz;
CUmemAccessDesc accessDesc;
CUmemAllocationProp prop;
size_t new_sz = 1024;
size_t aligned_sz;
static inline void checkDrvError(CUresult res, const char *tok,
const char *file, unsigned line) {
assert(res == CUDA_SUCCESS);
}
#define CHECK_DRV(x) checkDrvError(x, #x, __FILE__, __LINE__);
CUdeviceptr newAddr() {
CUdeviceptr new_ptr = 0ULL;
CUmemGenericAllocationHandle handle;
aligned_sz = ((new_sz + chunk_sz - 1) / chunk_sz) * chunk_sz;
CHECK_DRV(cuMemAddressReserve(&new_ptr, aligned_sz, 0ULL, 0ULL, 0ULL));
CHECK_DRV(cuMemCreate(&handle, aligned_sz, &prop, 0));
CHECK_DRV(cuMemMap(new_ptr, aligned_sz, 0ULL, handle, 0ULL));
CHECK_DRV(cuMemSetAccess(new_ptr, aligned_sz, &accessDesc, 1ULL));
return new_ptr;
}
#define CHECK_DRV(x) checkDrvError(x, #x, __FILE__, __LINE__);
int main() {
CHECK_DRV(cuInit(0));
CHECK_DRV(cuDevicePrimaryCtxRetain(&ctx, 0));
CHECK_DRV(cuCtxSetCurrent(ctx));
CHECK_DRV(cuCtxGetDevice(&dev));
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = 0;
prop.win32HandleMetaData = NULL;
accessDesc.location = prop.location;
accessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
CHECK_DRV(cuMemGetAllocationGranularity(&chunk_sz, &prop,
CU_MEM_ALLOC_GRANULARITY_MINIMUM));
CUdeviceptr p1 = newAddr();
CUdeviceptr p2 = newAddr();
CHECK_DRV(cuMemcpy(p1, p2, aligned_sz));
return 0;
}
If I comment
CHECK_DRV(cuMemcpy(p1, p2, aligned_sz));
everything is fine.