cudaGraphicsGLRegisterBuffer and unspecified driver error

Hi, I’m new to CUDA programming so my question might be silly. Also sorry for my english :)

I want to learn how to use CUDA Buffers with OpenGL textures. I copied some code from Mandelbrot CUDA SDK example. My code compiles without errors or warning, but when I try to run it I get this output

Creating GL texture...

Texture created.

Creating PBO...

src/main.cu(115) : cudaSafeCall() Runtime API error : unspecified driver error.

line 115 is

cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO,cudaGraphicsMapFlagsWriteDiscard));

I don’t think it’s driver issue, because I can run Mandelbrot example on my system (and this, and some other lines, are copied from Mandelbrot example).

My whole aplication code

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <cstdio>

#include <GL/glew.h>

#if defined(__APPLE__) || defined(MACOSX)

#include <GLUT/glut.h>

#else

#include <GL/glut.h>

#endif

#ifdef _WIN32

#include <GL/wglew.h>

#endif

#include <cuda_runtime_api.h>

#include <cutil_inline.h>

#include <cutil_gl_inline.h>

#include <cuda_gl_interop.h>

#include <rendercheck_gl.h>

// includes, kernels

#include "kernel.cu"

#define BUFFER_DATA(i) ((char *)0 + i)

/* The number of our GLUT window */

int window;

int size; //initial size of window

int imageW;

int imageH;

unsigned char* h_data;

unsigned char* d_data;

int frame = 0, mtime, timebase = 0;

float fps;

//OpenGL PBO and texture "names"

GLuint gl_PBO, gl_Tex;

struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange

//Source image on the host side

uchar4 *h_Src = 0;

// Destination image on the GPU side

uchar4 *d_dst = NULL;

/* A general OpenGL initialization function.  Sets all of the initial parameters. */

GLvoid InitGL(GLsizei Width, GLsizei Height)	// We call this right after our OpenGL window is created.

{

    glEnable(GL_TEXTURE_2D);                    // Enable texture mapping.

glClearColor(0.0f, 0.0f, 0.2f, 0.0f);	// This Will Clear The Background Color To Black

    glClearDepth(1.0);				// Enables Clearing Of The Depth Buffer

glShadeModel(GL_SMOOTH);			// Enables Smooth Color Shading

glMatrixMode(GL_PROJECTION);

    glLoadIdentity();				// Reset The Projection Matrix

gluPerspective(45.0f,(GLfloat)Width/(GLfloat)Height,0.1f,100.0f);	// Calculate The Aspect Ratio Of The Window

glMatrixMode(GL_MODELVIEW);

}

void initOpenGLBuffers(int w, int h)

{

    // delete old buffers

    if (h_Src) {

        free(h_Src);

        h_Src = 0;

    }

if (gl_Tex) {

        glDeleteTextures(1, &gl_Tex);

        gl_Tex = 0;

    }

    if (gl_PBO) {

		cudaGraphicsUnregisterResource(cuda_pbo_resource);

        glDeleteBuffers(1, &gl_PBO);

        gl_PBO = 0;

    }

// check for minimized window

    if ((w==0) && (h==0)) {

        return;

    }

// allocate new buffers

	h_Src = (uchar4*)malloc(w * h * 4);

printf("Creating GL texture...\n");

        glEnable(GL_TEXTURE_2D);

        glGenTextures(1, &gl_Tex);

        glBindTexture(GL_TEXTURE_2D, gl_Tex);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);

        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, h_Src);

    printf("Texture created.\n");

printf("Creating PBO...\n");

        glGenBuffers(1, &gl_PBO);

        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);

        glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, w * h * 4, h_Src, GL_STREAM_COPY);

        //While a PBO is registered to CUDA, it can't be used

        //as the destination for OpenGL drawing calls.

        //But in our particular case OpenGL is only used

        //to display the content of the PBO, specified by CUDA kernels,

        //so we need to register/unregister it only once.

cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO,cudaGraphicsMapFlagsWriteDiscard));

    printf("PBO created.\n");

}

void InitData()

{

	cudaSetDevice( cutGetMaxGflopsDeviceId() );

	glewInit();

	initOpenGLBuffers(imageW, imageH);

}

/* The function called when our window is resized */

GLvoid ReSizeGLScene(GLsizei Width, GLsizei Height)

{

    if (Height==0)				// Prevent A Divide By Zero If The Window Is Too Small

	Height=1;

glViewport(0, 0, Width, Height);

glMatrixMode(GL_PROJECTION);

    glLoadIdentity();

    glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);

glMatrixMode(GL_MODELVIEW);

    glLoadIdentity();

imageW=Width;

    imageH=Height;

    initOpenGLBuffers(Width, Height);

}

void ComputeFPS()

{

	frame++;

	    mtime = glutGet( GLUT_ELAPSED_TIME );

	    if( mtime - timebase > 1000 )

	    {

	        fps = (float)(frame * 1000) /( float )( mtime - timebase ) ;

	        timebase = mtime;

	        frame = 0;

	    }

	    char fpstext[256];

	    sprintf(fpstext, "fps: %3.1f", fps);

	    glutSetWindowTitle(fpstext);

}

void ComputeBitmap()

{

	dim3  grid( ((imageW*imageH)/512)+1, 1, 1);

	dim3  threads( 512, 1, 1);

	cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));

	size_t num_bytes;

	cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&d_dst, &num_bytes, cuda_pbo_resource));

	test_buf<<<grid, threads>>>(d_dst, imageW*imageH);

	cutilCheckMsg("Kernel execution failed");

	cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));

	ComputeFPS();

}

/* The main drawing function. */

GLvoid DrawGLScene()

{

	ComputeBitmap();

	glLoadIdentity();                        // reset the view before we draw each star.

	glEnable(GL_TEXTURE_2D);

	glDisable(GL_DEPTH_TEST);

	glBindTexture(GL_TEXTURE_2D, gl_Tex);

	glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA, GL_UNSIGNED_BYTE, BUFFER_DATA(0));

	glBegin(GL_QUADS);			// Begin Drawing The Textured Quad

	glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f,0.0f);

	glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f,0.0f);

	glTexCoord2f(1.0f, 1.0f); glVertex2f(1.0f,1.0f);

	glTexCoord2f(0.0f, 1.0f); glVertex2f(0.0f,1.0f);

	glEnd();				// Done Drawing The Textured Quad

	glBindTexture(GL_TEXTURE_2D, 0);

    // since this is double buffered, swap the buffers to display what just got drawn.

    glutSwapBuffers();

    ComputeFPS();

}

GLvoid OnClose()

{

	cudaGraphicsUnregisterResource(cuda_pbo_resource);

	glDeleteBuffers(1, &gl_PBO);

	glDeleteTextures(1, &gl_Tex);

	cudaThreadExit();

}

void usage(char* arg)

{

	fprintf(stdout, "USAGE:\n%s windowSize chargesCount maxCharge maxColoredCharge\n", arg);

	fflush(stdout);

}

int main(int argc, char** argv)

{

	//setting GL window

	/* Initialize GLUT state - glut will take any command line arguments that pertain to it or

	       X Windows - look at its documentation at http://reality.sgi.com/mjk/spec3/spec3.html */

	glutInit(&argc, argv);

	if(argc!=5)

	{

		usage(argv[0]);

		return EXIT_FAILURE;

	}

	try

	{

		size=atoi(argv[1]);

		imageW=size;

		imageH=size;

		//chargesCount=atoi(argv[2]);

		//maxCharge=atoi(argv[3]);

		//maxColoredCharge=atoi(argv[4]);

	}

	catch(...)

	{

		fprintf(stdout, "Bad parameter(s) format\n");

		fflush(stdout);

		usage(argv[0]);

		return EXIT_FAILURE;

	}

	/* Select type of Display mode:

	 Double buffer

	 RGBA color */

	glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);

	/* get a window */

	glutInitWindowSize(size, size);

	/* the window starts at the upper left corner of the screen */

	glutInitWindowPosition(0, 0);

	/* Open a window */

	window = glutCreateWindow("Project CUDA");

	/* Register the function to do all our OpenGL drawing. */

	glutDisplayFunc(&DrawGLScene);

	/* Even if there are no events, redraw our gl scene. */

	glutIdleFunc(&DrawGLScene);

	/* Register the function called when our window is resized. */

	glutReshapeFunc(&ReSizeGLScene);

	atexit(&OnClose);

	/* Initialize our window. */

	InitGL(size, size);

	InitData();

	/* Start Event Processing Engine */

	glutMainLoop();

}

and very simple kernel

__global__ void test_buf(uchar4* data, int size)

{

	int tid = blockIdx.x * blockDim.x + threadIdx.x;

	unsigned char c = threadIdx.x%256;

	if(tid<size)

		data[tid] = make_uchar4(c,c,c,0);

}

What I am not doing or doing wrong?

You need to initialize CUDA before creating the GLUT window (you’ll notice the Mandelbrot sample does this). The reshape function gets called once on window creation.

What do you mean “initialize CUDA”? Can you tell me which command should I use?

I rebuilded my code to be more like Mandelbrot exaple:

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <cstdio>

#include <GL/glew.h>

#if defined(__APPLE__) || defined(MACOSX)

#include <GLUT/glut.h>

#else

#include <GL/glut.h>

#endif

#ifdef _WIN32

#include <GL/wglew.h>

#endif

#include <cuda_runtime_api.h>

#include <cutil_inline.h>

#include <cutil_gl_inline.h>

#include <cuda_gl_interop.h>

#include <rendercheck_gl.h>

// includes, kernels

#include "kernel.cu"

#define BUFFER_DATA(i) ((char *)0 + i)

/* The number of our GLUT window */

int window;

int size; //initial size of window

int imageW;

int imageH;

unsigned char* h_data;

unsigned char* d_data;

int frame = 0, mtime, timebase = 0;

float fps;

//OpenGL PBO and texture "names"

GLuint gl_PBO, gl_Tex;

struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange

//Source image on the host side

uchar4 *h_Src = 0;

// Destination image on the GPU side

uchar4 *d_dst = NULL;

void initOpenGLBuffers(int w, int h)

{

    // delete old buffers

    if (h_Src) {

        free(h_Src);

        h_Src = 0;

    }

if (gl_Tex) {

        glDeleteTextures(1, &gl_Tex);

        gl_Tex = 0;

    }

    if (gl_PBO) {

		cudaGraphicsUnregisterResource(cuda_pbo_resource);

        glDeleteBuffers(1, &gl_PBO);

        gl_PBO = 0;

    }

// check for minimized window

    if ((w==0) && (h==0)) {

        return;

    }

// allocate new buffers

	h_Src = (uchar4*)malloc(w * h * 4);

printf("Creating GL texture...\n");

        glEnable(GL_TEXTURE_2D);

        glGenTextures(1, &gl_Tex);

        glBindTexture(GL_TEXTURE_2D, gl_Tex);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);

        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, h_Src);

    printf("Texture created.\n");

printf("Creating PBO...\n");

        glGenBuffers(1, &gl_PBO);

        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);

        glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, w * h * 4, h_Src, GL_STREAM_COPY);

        //While a PBO is registered to CUDA, it can't be used

        //as the destination for OpenGL drawing calls.

        //But in our particular case OpenGL is only used

        //to display the content of the PBO, specified by CUDA kernels,

        //so we need to register/unregister it only once.

cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO, cudaGraphicsMapFlagsWriteDiscard));

    printf("PBO created.\n");

}

void InitData()

{

}

void ComputeFPS()

{

	frame++;

	    mtime = glutGet( GLUT_ELAPSED_TIME );

	    if( mtime - timebase > 1000 )

	    {

	        fps = (float)(frame * 1000) /( float )( mtime - timebase ) ;

	        timebase = mtime;

	        frame = 0;

	    }

	    char fpstext[256];

	    sprintf(fpstext, "fps: %3.1f", fps);

	    glutSetWindowTitle(fpstext);

}

void ComputeBitmap()

{

	dim3  grid( ((imageW*imageH)/512)+1, 1, 1);

	dim3  threads( 512, 1, 1);

	cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));

	size_t num_bytes;

	cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&d_dst, &num_bytes, cuda_pbo_resource));

	test_buf<<<grid, threads>>>(d_dst, imageW*imageH);

	cutilCheckMsg("Kernel execution failed");

	cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));

	ComputeFPS();

}

/* The main drawing function. */

GLvoid DrawGLScene()

{

	ComputeBitmap();

	glLoadIdentity();                        // reset the view before we draw each star.

	glEnable(GL_TEXTURE_2D);

	glDisable(GL_DEPTH_TEST);

	glBindTexture(GL_TEXTURE_2D, gl_Tex);

	glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA, GL_UNSIGNED_BYTE, BUFFER_DATA(0));

	glBegin(GL_QUADS);			// Begin Drawing The Textured Quad

	glTexCoord2f(0.0f, 0.0f); glVertex2f(0.0f,0.0f);

	glTexCoord2f(1.0f, 0.0f); glVertex2f(1.0f,0.0f);

	glTexCoord2f(1.0f, 1.0f); glVertex2f(1.0f,1.0f);

	glTexCoord2f(0.0f, 1.0f); glVertex2f(0.0f,1.0f);

	glEnd();				// Done Drawing The Textured Quad

	glBindTexture(GL_TEXTURE_2D, 0);

    // since this is double buffered, swap the buffers to display what just got drawn.

    glutSwapBuffers();

    ComputeFPS();

}

GLvoid OnClose()

{

	cudaGraphicsUnregisterResource(cuda_pbo_resource);

	glDeleteBuffers(1, &gl_PBO);

	glDeleteTextures(1, &gl_Tex);

	cudaThreadExit();

}

void usage(char* arg)

{

	fprintf(stdout, "USAGE:\n%s windowSize chargesCount maxCharge maxColoredCharge\n", arg);

	fflush(stdout);

}

void initGL(int *argc, char **argv)

{

    printf("Initializing GLUT...\n");

        glutInit(argc, argv);

        glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);

        glutInitWindowSize(imageW, imageH);

        glutInitWindowPosition(0, 0);

        glutCreateWindow(argv[0]);

printf("Loading extensions: %s\n", glewGetErrorString(glewInit()));

	    if (!glewIsSupported( "GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object" )) {

		    fprintf(stderr, "Error: failed to get minimal extensions for demo\n");

		    fprintf(stderr, "This sample requires:\n");

		    fprintf(stderr, "  OpenGL version 1.5\n");

		    fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");

		    fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");

		    cutilExit(*argc, argv);

	    }

	printf("OpenGL window created.\n");

}

void idleFunc()

{

	glutPostRedisplay();

}

void reshapeFunc(int w, int h)

{

    glViewport(0, 0, w, h);

glMatrixMode(GL_MODELVIEW);

    glLoadIdentity();

glMatrixMode(GL_PROJECTION);

    glLoadIdentity();

    glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);

initOpenGLBuffers(w, h);

    imageW = w;

    imageH = h;

}

int main(int argc, char** argv)

{

	if(argc!=5)

	{

		usage(argv[0]);

		return EXIT_FAILURE;

	}

	try

	{

		size=atoi(argv[1]);

		imageW=size;

		imageH=size;

		//chargesCount=atoi(argv[2]);

		//maxCharge=atoi(argv[3]);

		//maxColoredCharge=atoi(argv[4]);

	}

	catch(...)

	{

		fprintf(stdout, "Bad parameter(s) format\n");

		fflush(stdout);

		usage(argv[0]);

		return EXIT_FAILURE;

	}

	initGL(&argc,argv);

	cudaSetDevice( cutGetMaxGflopsDeviceId() );

	InitData();

	initOpenGLBuffers(imageW,imageH);

	glutDisplayFunc(DrawGLScene);

	glutIdleFunc(idleFunc);

	glutReshapeFunc(reshapeFunc);

	atexit(OnClose);

	glutMainLoop();

}

and again I have this output:

Initializing GLUT...

Loading extensions: No error

OpenGL window created.

Creating GL texture...

Texture created.

Creating PBO...

src/main.cu(95) : cudaSafeCall() Runtime API error : unspecified driver error.

line 95 is (as earlier)

cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO, cudaGraphicsMapFlagsWriteDiscard));

I don’t know which step I am missing…

Use cudaGLSetGLDevice() function for initialize :)

You need both a CUDA context and an OpenGL context (and make sure you have an active OpenGL context prior to creating your CUDA context). In the Runtime API, it’ll (usually) create a CUDA context implicitly for you the first time you use a CUDA function that requires a context. However if you’re looking to use CUDA<–>OpenGL interoperability, you can use cudaGLSetGLDevice() as maninara stated, or you can do it even more explicitly with cuGLCtxCreate if you’re down with the Driver API.