Proper use of GLsetGLdevice and cudaGraphicsGLRegisterBuffer for openGL interop.

My use of cudaGraphicsGLRegisterBuffer() in combination with an openGL context established using glfw causes a segfault:

Program received signal SIGSEGV, Segmentation fault.
0x0000000000000000 in ?? ()
(cuda-gdb) bt
#0  0x0000000000000000 in ?? ()
#1  0x0000000000419c9b in GLInstance::initialize(std::shared_ptr<SoftmaxSettings>&, std::shared_ptr<SoftmaxData>&) ()
#2  0x00000000004154dc in main ()
(cuda-gdb)

Where the instruction at that address is attempting to load some __glewBindBuffer data:

0x00419c9b      488b05268b2f.  mov rax, qword [obj.__glewBindBuffer]

After some reading of examples it looked like I should be explicitly setting the device via cudaGLSetGLDevice() after creating the openGL window; which returns the error:

GPUassert: cannot set while device is active in this process src/GLInstance.cu 27

What should be the proper sequence here, or perhaps I am missing some step? Let me know if I can provide any more information; there are two devices of 3.0 capability available, though I am only attempting to use one at the moment. And of course here’s a full code snippet:

void GLInstance::initialize(std::shared_ptr<SoftmaxSettings>& aSoftmaxSettings,
	     			        std::shared_ptr<SoftmaxData>& aSoftmaxData)
{
	mDrawSize = aSoftmaxData->hostPoints.size();

	if (!glfwInit()) 
	{
		fprintf(stderr, "ERROR: could not start GLFW3\n");
	}

	mWindow = glfwCreateWindow(aSoftmaxSettings->windowWidth, aSoftmaxSettings->windowHeight, "Cuda Regression", NULL, NULL);
	glfwSetWindowPos(mWindow, 1920 - 2*aSoftmaxSettings->windowWidth - 2, 0);
	if (!mWindow) 
	{
		fprintf(stderr, "ERROR: could not openmWindow with GLFW3\n");
		glfwTerminate();
	}

	glfwSetInputMode(mWindow, GLFW_STICKY_KEYS, GL_TRUE);
	glfwSetInputMode(mWindow, GLFW_CURSOR, GLFW_CURSOR_DISABLED);
	glfwPollEvents();
	glfwSetCursorPos(mWindow, aSoftmaxSettings->windowWidth/2, aSoftmaxSettings->windowHeight/2);
	glfwMakeContextCurrent(mWindow);
	glDepthFunc(GL_LESS);

	//gpuErrchk(cudaSetDevice(0));
	gpuErrchk(cudaGLSetGLDevice(0));

		// mPosition Buffer Array

	mPointsVBO = 0;
	glGenBuffers(1, &mPointsVBO);
	glBindBuffer(GL_ARRAY_BUFFER, mPointsVBO);
	glBufferData(GL_ARRAY_BUFFER, aSoftmaxData->hostPoints.size()*sizeof(float3), aSoftmaxData->hostPoints.data(), GL_STATIC_DRAW);
	gpuErrchk(cudaGraphicsGLRegisterBuffer(&mCudaPointsResource, mPointsVBO, cudaGraphicsMapFlagsReadOnly));

		// Color Buffer Array

	mColorsVBO = 0;
	glGenBuffers(1, &mColorsVBO);
	glBindBuffer(GL_ARRAY_BUFFER, mColorsVBO);
	glBufferData(GL_ARRAY_BUFFER, aSoftmaxData->hostColors.size()*sizeof(float3), aSoftmaxData->hostColors.data(), GL_DYNAMIC_DRAW);
	gpuErrchk(cudaGraphicsGLRegisterBuffer(&mCudaColorResource, mColorsVBO, cudaGraphicsMapFlagsNone));