cuIpcOpenMemHandle return CUDA_ERROR_INVALID_CONTEXT

#include <cuda.h>
#include <opencv2/opencv.hpp>
#include <iostream>

#define Elog(fmt,...)                                       \
	do {                                                    \
		fprintf(stderr, "%s:%d: " fmt "\n",                 \
				__FUNCTION__, __LINE__, ##__VA_ARGS__);     \
	} while(0)

static const char *
cudaErrorName(CUresult rc)
{
	const char *result;

	if (cuGetErrorName(rc, &result) != CUDA_SUCCESS)
		return "unknown error";
	return result;
}

using namespace cv;
using namespace std;

int main()
{
	Mat img(1920, 1080, CV_8UC3);
	randu(img, Scalar(0, 0, 0), Scalar(255, 255, 255));
    imwrite("publish.jpg", img);
    
	void *ptr = NULL;
    CUresult rc;
    CUdevice cuda_device;
    CUcontext cuda_context;
    CUdeviceptr cuda_devptr;
    CUipcMemHandle ipc_memhandle;

    rc = cuInit(0);
    if (rc != CUDA_SUCCESS)	Elog("failed on cuInit: %s", cudaErrorName(rc));
    rc = cuDeviceGet(&cuda_device, 0);
    if (rc != CUDA_SUCCESS)	Elog("failed on cuDeviceGet: %s", cudaErrorName(rc));
    rc = cuCtxCreate(&cuda_context, 0, cuda_device);
    if (rc != CUDA_SUCCESS)	Elog("failed on cuCxtCreate: %s", cudaErrorName(rc));
    rc = cuCtxEnablePeerAccess(cuda_context, 0);


    size_t count = img.total() * img.elemSize();
    cout << count << endl;
    rc = cuMemAlloc(&cuda_devptr, count);
    if (rc != CUDA_SUCCESS)	Elog("failed on cuMemAlloc: %s", cudaErrorName(rc));
    rc= cuIpcGetMemHandle(&ipc_memhandle, cuda_devptr);
    if (rc != CUDA_SUCCESS)	Elog("failed on cuMemGetMemHandle: %s", cudaErrorName(rc));

    rc = cuIpcOpenMemHandle(&cuda_devptr, ipc_memhandle, CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS);
    if (rc != CUDA_SUCCESS)	Elog("failed on cuIpcOpenMemHandle: %s", cudaErrorName(rc));
    getchar();
    return 0;
}

And using cuda runtime api also has this problem. use the cudaGetDeviceProperties to check unifiedAddressing and computeMode, turn out to be right. So am I doing wrong on something?

IPC is intended for interprocess communication. The mem handle is designed to be opened by another process. Why are you trying to open it in the process you created it in?

If you want to access the allocation from that original process, just use the cuda_devptr you have already allocated via cuMemAlloc. Even if the runtime API let you overwrite the pointer, you would have a memory leak.

It’s not clear you understand the general usage pattern here. Please study the cuda simpleIPC sample code.

Oh because I use cuda IPC in ROS2 node and cudaIpcOpenMemHandle get “invalid device context”, then I think maybe my usage is wrong, so I use in the same process to check.