Error: Internal profiling error 4142:999

I’m using cuda release 11.0, V11.0.221 and driver 450.80.02 and got a nvvp error:

==9811== Error: Internal profiling error 4142:999.
======== Error: CUDA profiling error.
#include <cuda.h>

#include <cassert>
#include <iostream>

CUresult status = CUDA_SUCCESS;
CUcontext ctx;
CUdevice dev;

size_t chunk_sz;
CUmemAccessDesc accessDesc;
CUmemAllocationProp prop;

size_t new_sz = 1024;
size_t aligned_sz;

static inline void checkDrvError(CUresult res, const char *tok,
                                 const char *file, unsigned line) {
  assert(res == CUDA_SUCCESS);
}

#define CHECK_DRV(x) checkDrvError(x, #x, __FILE__, __LINE__);

CUdeviceptr newAddr() {
  CUdeviceptr new_ptr = 0ULL;
  CUmemGenericAllocationHandle handle;

  aligned_sz = ((new_sz + chunk_sz - 1) / chunk_sz) * chunk_sz;
  CHECK_DRV(cuMemAddressReserve(&new_ptr, aligned_sz, 0ULL, 0ULL, 0ULL));
  CHECK_DRV(cuMemCreate(&handle, aligned_sz, &prop, 0));
  CHECK_DRV(cuMemMap(new_ptr, aligned_sz, 0ULL, handle, 0ULL));
  CHECK_DRV(cuMemSetAccess(new_ptr, aligned_sz, &accessDesc, 1ULL));

  return new_ptr;
}

#define CHECK_DRV(x) checkDrvError(x, #x, __FILE__, __LINE__);

int main() {
  CHECK_DRV(cuInit(0));
  CHECK_DRV(cuDevicePrimaryCtxRetain(&ctx, 0));
  CHECK_DRV(cuCtxSetCurrent(ctx));
  CHECK_DRV(cuCtxGetDevice(&dev));

  prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
  prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
  prop.location.id = 0;
  prop.win32HandleMetaData = NULL;

  accessDesc.location = prop.location;
  accessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;

  CHECK_DRV(cuMemGetAllocationGranularity(&chunk_sz, &prop,
                                          CU_MEM_ALLOC_GRANULARITY_MINIMUM));

  CUdeviceptr p1 = newAddr();
  CUdeviceptr p2 = newAddr();

  CHECK_DRV(cuMemcpy(p1, p2, aligned_sz));

  return 0;
}

If I comment

CHECK_DRV(cuMemcpy(p1, p2, aligned_sz));

everything is fine.