Implementing graphics interoperability in molecular dynamics code

I’m having a little trouble implementing OpenGL and CUDA interoperability in an event-driven molecular dynamics code that I am working on. The kernel below is called from the OpenGL main loop. The idea is to read the data in the device memory in order to show the current positions of the particles in the window.

__global__ void createParticles( float4* positions,

                                       float4* _dev_particles,

				       unsigned int width, unsigned int height) {

	int tidx = threadIdx.x + blockIdx.x*blockDim.x;

	int tidy = threadIdx.y + blockIdx.y*blockDim.y;

	int gid = tidx + tidy*blockDim.x*gridDim.x;

// assign particle position to VBO in OpenGL

	positions[gid] = make_float4((float)_dev_particles[gid].x, (float)_dev_particles[gid].y, 0.0f , 1.0f);

}

And here’s where the scene is drawn:

void drawScene() {

	int width = windowSize;

	int height = windowSize;

	float timeNow = 0.0f;

	//Clear information from last draw

	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

	glMatrixMode(GL_MODELVIEW); //Switch to the drawing perspective

	glLoadIdentity(); //Reset the drawing perspective

	glTranslatef(0.0f, 0.0f, -3.0f); //Move forward 5 units

	// Draw bounding box

	glBegin(GL_LINE_LOOP);

	glVertex3f(-1.0f,-1.0f, 0.0f);

	glVertex3f(-1.0f, 1.0f, 0.0f);

	glVertex3f( 1.0f, 1.0f, 0.0f);

	glVertex3f( 1.0f,-1.0f, 0.0f);

	glEnd();

	// Map buffer object for writing from CUDA

	float4* positions;

	cudaGraphicsMapResources( 1, &positionsVBO_CUDA, 0 );

	size_t size;

	checkCudaErrors(cudaGraphicsResourceGetMappedPointer ( (void**)&positions, &size, positionsVBO_CUDA));

	// Execute kernel to generate test vertices

	dim3	dimBlock(16, 16, 1);

	dim3	dimGrid(width/dimBlock.x, height/dimBlock.y, 1);

	createParticles<<<dimGrid, dimBlock>>>(positions, dev_particles, width, height);

	// Unmap buffer object

	cudaGraphicsUnmapResources( 1, &positionsVBO_CUDA, 0 );

	// Render from buffer object

	glBindBuffer( GL_ARRAY_BUFFER, positionsVBO );

	glVertexPointer( 4, GL_FLOAT, 0, 0);

	glEnableClientState (GL_VERTEX_ARRAY);

	glDrawArrays( GL_POINTS, 0, windowSize * windowSize );

	glDisableClientState( GL_VERTEX_ARRAY );

	

	// Swap buffers

	glutSwapBuffers(); 

	glutPostRedisplay();

}

The code compiles, but when I try to run it, the program crashes and my computer hangs. If I use test values that are calculated within the kernel itself, then there are no issues and the code runs perfectly.

My questions are:

[list=1]

[*]Is it possible to read device memory into a VBO?

[*]Is the problem due to having a single frame buffer?