Hi all,
I’ve been trying to understand how to use CUDA+OpenGL+VBO’s for some graphical rendering of my data (which is continuously being modified by CUDA kernels). From what I read it seems the best way to to this is with a VBO. I started by following the example code in the programming guide (under ‘OpenGL Interoperability’, page 38) and can’t get it to work properly - it compiles, runs and even displays some initialisation data (which reside on the host), but I found my CUDA calls weren’t having an effect. So I surrounded all CUDA calls with ‘CUDA_SAFE_CALL’ and back-traced my issue back to this line:
CUDA_SAFE_CALL( cudaGraphicsGLRegisterBuffer(&vboID_d, vboID_h, cudaGraphicsMapFlagsNone) ); // unspecified launch failure in prior launch
I don’t even know if cuda-gdb could be of use here? I’ve been attempting to get a simple OpenGL+CUDA program working for about a week now. I should note that I’m new to OpenGL, so I’m not even sure exactly what’s going on here… I adapted this code from the programming guide and from the ‘nbody’ SDK example (The source-code of which I found to be very confusing!). Here are some snippets of my code to illustrate what I’m attempting to do:
Note: All this code, unless specified otherwise, is in the same file and uses globals.
// Global Variables
...
GLuint vboID_h;
cudaGraphicsResource* vboID_d;
void main(){
...
initGL();
initVBO();
cudaSetGLDevice(0);
...
glutDisplayFunc(display);
...
glutMainLoop();
void initGL(int argc, char** argv)
{
// First initialize OpenGL context, so we can properly set the GL for CUDA.
// This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
glutInitWindowSize(720, 480);
glutCreateWindow("CUDA & OpenGL Simulation");
if (bFullscreen)
glutFullScreen();
GLenum err = glewInit();
if (GLEW_OK != err)
{
shrLog("GLEW Error: %s\n", glewGetErrorString(err));
exit(-1);
}
else if (!glewIsSupported("GL_VERSION_2_0 "
"GL_VERSION_1_5 "
"GL_ARB_multitexture "
"GL_ARB_vertex_buffer_object"))
{
fprintf(stderr, "Required OpenGL extensions missing.");
cudaThreadExit();
exit(-1);
}
else
{
#if defined(WIN32)
wglSwapIntervalEXT(0);
#elif defined(LINUX)
glxSwapIntervalSGI(0);
#endif
}
glEnable(GL_DEPTH_TEST);
glClearColor(0.0, 0.0, 0.0, 1.0);
checkGLErrors("initGL");
}
// Most of this code came from the Programming Guide
void initVBO(){
// Create buffer object and register it with CUDA
glGenBuffers(1, &vboID_h);
glBindBuffer(GL_ARRAY_BUFFER, vboID_h);
static unsigned int size = N_CELLS * 3 * sizeof(float);
// Just some data to start off with to see if the kernel/cudaBuffer has any effect
float original;
for (int n=0; n<size; ++n){
if (n>0 && (n+1)%3==0){
original[n] = 0.0;
} else {
original[n] = (float)(n/100.);
}
}
glBufferData(GL_ARRAY_BUFFER, size, original, GL_DYNAMIC_DRAW);
//glBindBuffer(GL_ARRAY_BUFFER, vboID_h); // this line is in the programming guide, but it seems like it shouldn't be here, as it un-binds the first binding??
cudaThreadSynchronize();
CUDA_SAFE_CALL( cudaGraphicsGLRegisterBuffer(&vboID_d, vboID_h, cudaGraphicsMapFlagsNone) ); // <-- error here
cudaThreadSynchronize();
}
void display(){
...
float* arr_d;
size_t num_bytes;
float debug[16];
// Map buffer object
CUDA_SAFE_CALL(cudaGraphicsMapResources(1, &vboID_d, streamID));
CUDA_SAFE_CALL(cudaGraphicsResourceGetMappedPointer((void**)&arr_d, &num_bytes, vboID_d));
runCudaGL(vboID_d); // wrapper to call the kernel (which resides in a .cu file along with the kernels it calls)
// Unmap buffer object
cudaGraphicsUnmapResources(1, &vboID_d, 0);
// Render from buffer object
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, vboID_h);
glVertexPointer(3, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, 16); // 3 -> N_CELLS
glDisableClientState(GL_VERTEX_ARRAY);
...
}
-
The nbody example didn’t use ‘cudaGraphicsGLRegisterBuffer’… presumeably because it was written before this function?
-
Can anyone seen any obvious bugs/mistakes?