Order of cuInit, cuGLInit, cuGLCtxCreate CUDA_ERROR_INVALID_CONTEXT

What is the correct order of the initialization calls for cuda/opengl?

I have
GLX context creation, make current
cuInit(0)
cuGLInit()
cuGLCtxCreate(…)

cuGLInit yeilds CUDA_ERROR_INVALID_CONTEXT.

I can’t seem to find examples anywhere.

If I try:

cuInit
cuCtxCreate
cuGLInit

I get a seg fault:

#0 0x00007fac6c3bd369 in glGpuSyncGetHandleSizeNVX () from /usr/lib/libGL.so.1
#1 0x00007fac68f2518c in ?? () from /usr/lib/libcuda.so.1
#2 0x00007fac68f24a65 in ?? () from /usr/lib/libcuda.so.1
#3 0x000000000047c057 in pt::CudaTask::openGlCudaInit (this=0x1f7e440)
at CudaTask.cpp:59

edit: Additional information:
185.12 with Cuda beta 2.2
Opengl 3.0 glXCreateContextAttribsARB
I am using cuda device (cu*) calls, not cuda*.

I made a test program that outputs:

cuGLInit error: 201

Anything wrong with it? I have a GTX 8800 and GT 8600.

#include <iostream>

#include <X11/Xlib.h>

#include <assert.h>

#include <GL/gl.h>

#include <GL/glext.h>

#include <GL/glx.h>

#include <GL/glxext.h>

#include <cuda.h>

#include <cudaGL.h>

// compiled with:  g++ -I/usr/local/cuda/include -lX11 -lGL -lcuda -o testcudagl testcudagl.cpp 

int main(int argc, char * argv[])

{

	GLXContext _ctx, _share = 0;

	Display * display;

	display = XOpenDisplay(NULL);

	assert(display);

	int arr[] = { GLX_DRAWABLE_TYPE, GLX_PBUFFER_BIT | GLX_WINDOW_BIT , None };

	int n;

	GLXFBConfig * c = glXChooseFBConfig(display,

						DefaultScreen(display),

						arr, &n);

	assert(n);

	_ctx = glXCreateNewContext (display, c[0], GLX_RGBA_TYPE, 0, True);

	assert(_ctx);

	GLXPbuffer _pbuffer = glXCreatePbuffer(display, c[0], NULL);

	assert(_pbuffer);

		assert(True == glXMakeContextCurrent(display, _pbuffer, _pbuffer, _ctx));

	assert(CUDA_SUCCESS == cuInit(0));

	CUresult rc = cuGLInit();

	if (rc != CUDA_SUCCESS)

	{

		std::cerr << "cuGLInit error:  " << rc << std::endl;

	}

	return 0;

}

edit:
Found the problem. Had my own thread issues.
The order is this:
make GL context, set current
cuInit
cuCtxCreate
cuGLInit
cuGLCtxCreate