cudamemcpy from PBO to host?

I am trying to copy the pixel data from a pbo to host memory using cudamemcpy. I am not having success…Perhaps I am not understanding cuda/gl interoperability enough.

Prior to render I init pbo and register with cuda

glGenBuffersARB(PBO_COUNT, &pboId);

  glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pboId); 

  glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, DATA_SIZE, 0, GL_STREAM_READ_ARB); 

  cudaGLRegisterBufferObject( pboId ); 

  glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0);

…and Allocate host memory:

cudaMallocHost( (void **) & pHostMemory, DATA_SIZE);

In render loop

render scene

then I copy frame buffer back via pbo

glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pboId);

    glReadPixels(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT, PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0);

    glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0);

copy pbo data from device to host

uchar4 *dptr;

    cudaGLMapBufferObject((void**)&dptr, pboIds[nextIndex]);

    cudaMemcpy(pHostMemory, dptr, NBYTES, cudaMemcpyDeviceToHost);

    cudaGLUnmapBufferObject(pboId);

Is this correct? After the call to cudaGLMapBufferObject, what is dptr really pointing to? Is it pointing to the same memory as src in this case?

glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pboId);

    GLubyte* src = (GLubyte*)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);

    if(src)

    {

        // mod data

        glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB);     // release pointer to the mapped buffer

    }

CD

I am trying to copy the pixel data from a pbo to host memory using cudamemcpy. I am not having success…Perhaps I am not understanding cuda/gl interoperability enough.

Prior to render I init pbo and register with cuda

glGenBuffersARB(PBO_COUNT, &pboId);

  glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pboId); 

  glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, DATA_SIZE, 0, GL_STREAM_READ_ARB); 

  cudaGLRegisterBufferObject( pboId ); 

  glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0);

…and Allocate host memory:

cudaMallocHost( (void **) & pHostMemory, DATA_SIZE);

In render loop

render scene

then I copy frame buffer back via pbo

glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pboId);

    glReadPixels(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT, PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0);

    glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0);

copy pbo data from device to host

uchar4 *dptr;

    cudaGLMapBufferObject((void**)&dptr, pboIds[nextIndex]);

    cudaMemcpy(pHostMemory, dptr, NBYTES, cudaMemcpyDeviceToHost);

    cudaGLUnmapBufferObject(pboId);

Is this correct? After the call to cudaGLMapBufferObject, what is dptr really pointing to? Is it pointing to the same memory as src in this case?

glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, pboId);

    GLubyte* src = (GLubyte*)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB);

    if(src)

    {

        // mod data

        glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB);     // release pointer to the mapped buffer

    }

CD