I’m having a little trouble implementing OpenGL and CUDA interoperability in an event-driven molecular dynamics code that I am working on. The kernel below is called from the OpenGL main loop. The idea is to read the data in the device memory in order to show the current positions of the particles in the window.
__global__ void createParticles( float4* positions,
float4* _dev_particles,
unsigned int width, unsigned int height) {
int tidx = threadIdx.x + blockIdx.x*blockDim.x;
int tidy = threadIdx.y + blockIdx.y*blockDim.y;
int gid = tidx + tidy*blockDim.x*gridDim.x;
// assign particle position to VBO in OpenGL
positions[gid] = make_float4((float)_dev_particles[gid].x, (float)_dev_particles[gid].y, 0.0f , 1.0f);
}
And here’s where the scene is drawn:
void drawScene() {
int width = windowSize;
int height = windowSize;
float timeNow = 0.0f;
//Clear information from last draw
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_MODELVIEW); //Switch to the drawing perspective
glLoadIdentity(); //Reset the drawing perspective
glTranslatef(0.0f, 0.0f, -3.0f); //Move forward 5 units
// Draw bounding box
glBegin(GL_LINE_LOOP);
glVertex3f(-1.0f,-1.0f, 0.0f);
glVertex3f(-1.0f, 1.0f, 0.0f);
glVertex3f( 1.0f, 1.0f, 0.0f);
glVertex3f( 1.0f,-1.0f, 0.0f);
glEnd();
// Map buffer object for writing from CUDA
float4* positions;
cudaGraphicsMapResources( 1, &positionsVBO_CUDA, 0 );
size_t size;
checkCudaErrors(cudaGraphicsResourceGetMappedPointer ( (void**)&positions, &size, positionsVBO_CUDA));
// Execute kernel to generate test vertices
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width/dimBlock.x, height/dimBlock.y, 1);
createParticles<<<dimGrid, dimBlock>>>(positions, dev_particles, width, height);
// Unmap buffer object
cudaGraphicsUnmapResources( 1, &positionsVBO_CUDA, 0 );
// Render from buffer object
glBindBuffer( GL_ARRAY_BUFFER, positionsVBO );
glVertexPointer( 4, GL_FLOAT, 0, 0);
glEnableClientState (GL_VERTEX_ARRAY);
glDrawArrays( GL_POINTS, 0, windowSize * windowSize );
glDisableClientState( GL_VERTEX_ARRAY );
// Swap buffers
glutSwapBuffers();
glutPostRedisplay();
}
The code compiles, but when I try to run it, the program crashes and my computer hangs. If I use test values that are calculated within the kernel itself, then there are no issues and the code runs perfectly.
My questions are:
[list=1]
[*]Is it possible to read device memory into a VBO?
[*]Is the problem due to having a single frame buffer?