CUDA Interop for depth component, work-around.

I try to get depth value in kernel for post process. With CUDA interop, the texture can be get in kernel, but depth value show always 0. Below is my code. Can you show me why. As far as I know depth component is not support like RGB. I have to do a work-arround by mapping the depth value to texture. However I still not get the depth value.

Here is the sample code. You can see the teapot depth is drawn. But how can i pass this depth component to kernel do that I can have both depth and RGB value for postprocess.

Thanks in Advance.

main.cpp

#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <iostream>
#include <string>
#include <math.h>
#define GLEW_STATIC // Specify GLEW_STATIC to use the static linked library (.lib) instead of the dynamic linked library (.dll) for GLEW
#include <GL/glew.h>
#include <glut.h>

// CUDA headers
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

#include "Postprocess.cu"

#define SRC_BUFFER  0
#define DST_BUFFER  1
#define SRC_DEPTH   2

int g_iGLUTWindowHandle = 0;
int g_iWindowPositionX = 0;
int g_iWindowPositionY = 0;
int g_iWindowWidth = 512;
int g_iWindowHeight = 512;

int g_iImageWidth = g_iWindowWidth;
int g_iImageHeight = g_iWindowHeight;

float g_fRotate[3] = { 0.0f, 0.0f, 0.0f };  // Rotation parameter for scene object.

float g_fBlurRadius = 2.0f;                 // Radius of 2D convolution blur performed in postprocess step.

GLuint g_GLFramebuffer = 0;                  // Frame buffer object for off-screen rendering.
GLuint g_GLColorAttachment0 = 0;            // Color texture to attach to frame buffer object.
GLuint g_GLDepthAttachment = 0;             // Depth buffer to attach to frame buffer object.
GLuint depthTexture	= 0;

GLuint g_GLPostprocessTexture = 0;          // This is where the result of the post-process effect will go.
                                            // This is also the final texture that will be blit to the back buffer for viewing.

// The CUDA Graphics Resource is used to map the OpenGL texture to a CUDA
// buffer that can be used in a CUDA kernel.
// We need 2 resource: One will be used to map to the color attachment of the
//   framebuffer and used read-only from the CUDA kernel (SRC_BUFFER), 
//   the second is used to write the postprocess effect to (DST_BUFFER).
cudaGraphicsResource_t g_CUDAGraphicsResource[3] = { 0,  0, 0};   

using namespace std;

// Initialize OpenGL/GLUT
bool InitGL( int argc, char* argv[] );
// Initialize CUDA for OpenGL
void InitCUDA();
// Render a texture object to the current framebuffer
void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height );

// Create a framebuffer object that is used for offscreen rendering.
void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthTexture );
void DeleteFramebuffer( GLuint& framebuffer );

void CreatePBO( GLuint& bufferID, size_t size );
void DeletePBO( GLuint& bufferID );

void CreateTexture( GLuint& texture, unsigned int width, unsigned int height );
void DeleteTexture( GLuint& texture );

// Links a OpenGL texture object to a CUDA resource that can be used in the CUDA kernel.
void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags );
void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource );

void IdleGL();
void DisplayGL();
void KeyboardGL( unsigned char key, int x, int y );
void ReshapeGL( int w, int h );

void Cleanup( int errorCode, bool bExit = true )
{
    if ( g_iGLUTWindowHandle != 0 )
    {
        glutDestroyWindow( g_iGLUTWindowHandle );
        g_iGLUTWindowHandle = 0;
    }
    if ( bExit )
    {
        exit( errorCode );
    }
}

// Create a texture resource for rendering to.
void CreateTexture( GLuint& texture, unsigned int width, unsigned int height )
{
    // Make sure we don't already have a texture defined here
    DeleteTexture( texture );

    glGenTextures( 1, &texture );
    glBindTexture( GL_TEXTURE_2D, texture );

    // set basic parameters
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

    // Create texture data (4-component unsigned byte)
    glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );

    // Unbind the texture
    glBindTexture( GL_TEXTURE_2D, 0 );
}

void DeleteTexture( GLuint& texture )
{
    if ( texture != 0 )
    {
        glDeleteTextures(1, &texture );
        texture = 0;
    }
}

void CreateDepthTexture(GLuint& txDepth, unsigned int width, unsigned int height)
{
	DeleteTexture(txDepth);
	
	glGenTextures(1, &txDepth);
	glBindTexture(GL_TEXTURE_2D, txDepth);
	
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);	
	
	glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT32, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);
	
	glBindTexture(GL_TEXTURE_2D, 0);
}

void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthTexture )
{
    // Delete the existing framebuffer if it exists.
    DeleteFramebuffer( framebuffer );

    glGenFramebuffers( 1, &framebuffer );
    glBindFramebuffer( GL_FRAMEBUFFER, framebuffer );
	
	// Attach the texture to FBO depth attachment point
    glBindTexture(GL_TEXTURE_2D, depthTexture);
   glFramebufferTexture( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, depthTexture, 0 );

	glBindTexture(GL_TEXTURE_2D, colorAttachment0);
    glFramebufferTexture( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, colorAttachment0, 0 );

// Check to see if the frame buffer is valid
    GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
    if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
    {
        std::cerr << "ERROR: Incomplete framebuffer status." << std::endl;
    }

    // Unbind the frame buffer
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );
}

void DeleteFramebuffer( GLuint& framebuffer )
{
    if ( framebuffer != 0 )
    {
        glDeleteFramebuffers( 1, &framebuffer );
        framebuffer = 0;
    }
}

void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags )
{
    // Map the GL texture resource with the CUDA resource
    cudaGraphicsGLRegisterImage( &cudaResource, GLtexture, GL_TEXTURE_2D, mapFlags );
}

void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource )
{
    if ( cudaResource != 0 )
    {
        cudaGraphicsUnregisterResource( cudaResource );
        cudaResource = 0;
    }
}

int main( int argc, char* argv[] )
{
    glutInit(&argc, argv);
  
    // Init GLUT
    if ( !InitGL( argc, argv ) )
    {
        std::cerr << "ERROR: Failed to initialize OpenGL" << std::endl;
    }

  //  InitCUDA();
  
    // Startup our GL render loop
    glutMainLoop();

}

bool InitGL( int argc, char* argv[] )
{
 
    int iScreenWidth = glutGet(GLUT_SCREEN_WIDTH);
    int iScreenHeight = glutGet(GLUT_SCREEN_HEIGHT);

    glutInitDisplayMode( GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH );
    glutInitWindowPosition( iScreenWidth / 2 - g_iWindowWidth / 2,
        iScreenHeight / 2 - g_iWindowHeight / 2 );
    glutInitWindowSize( g_iWindowWidth, g_iWindowHeight );

    g_iGLUTWindowHandle = glutCreateWindow( "Postprocess GL" );

    // Register GLUT callbacks
    glutDisplayFunc(DisplayGL);
    glutKeyboardFunc(KeyboardGL);
    glutReshapeFunc(ReshapeGL);
    glutIdleFunc(IdleGL);

    // Init GLEW
    glewInit();
    GLboolean gGLEW = glewIsSupported(
        "GL_VERSION_3_1 " 
        "GL_ARB_pixel_buffer_object "
        "GL_ARB_framebuffer_object "
        "GL_ARB_copy_buffer " 
        );

    int maxAttachemnts = 0;
    glGetIntegerv( GL_MAX_COLOR_ATTACHMENTS, &maxAttachemnts );

    if ( !gGLEW ) return false;

    glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );

    // Setup the viewport
    glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

    // Setup the projection matrix
    glMatrixMode( GL_PROJECTION );
    glLoadIdentity();

    gluPerspective( 60.0, (GLdouble)g_iWindowWidth/(GLdouble)g_iWindowHeight, 0.0, 1.0 );
    glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );

    return true;
}

void InitCUDA()
{
    // We have to call cudaGLSetGLDevice if we want to use OpenGL interoperability.
    cudaGLSetGLDevice(0);
}

void IdleGL()
{
    glutPostRedisplay();
}

// Render the initial scene
void RenderScene()
{
    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

    glMatrixMode( GL_PROJECTION );
    glLoadIdentity();
    gluPerspective( 60.0, (GLdouble)g_iWindowWidth / (GLdouble)g_iWindowHeight, 0.1, 10.0 );

    glMatrixMode( GL_MODELVIEW );
    glLoadIdentity();
    glTranslatef( 0.0f, 0.0f, -4.0f );

    glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

    glEnable( GL_LIGHTING );
    glEnable( GL_DEPTH_TEST );
    glDepthFunc( GL_LESS );

gluLookAt(	0,   0, 1.2,	//	eye pos
				50, 0,  1,	//	aim point
				0,   0, 1);	//	up direction
    glutSolidTeapot( 1.0 );

}

// Perform a post-process effect on the current framebuffer (back buffer)
void Postprocess()
{     	
	glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
	glBindTexture( GL_TEXTURE_2D, depthTexture);

	glCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, 0, 0, g_iImageWidth, g_iImageHeight, 0 );

       glBindTexture( GL_TEXTURE_2D, 0 );
       glBindFramebuffer( GL_FRAMEBUFFER, 0 );

 //   PostprocessCUDA( g_CUDAGraphicsResource[DST_BUFFER], g_CUDAGraphicsResource[SRC_BUFFER], g_CUDAGraphicsResource[SRC_DEPTH] , g_iImageWidth, g_iImageHeight );

}

void DisplayGL()
{
    // Bind the framebuffer that we want to use as the render target.
    glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
    RenderScene();
    // Unbind the framebuffer so we render to the back buffer again.
    glBindFramebuffer( GL_FRAMEBUFFER, 0 );

    Postprocess();

    // Blit the image full-screen
 //   DisplayImage( g_GLPostprocessTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );
	//depthTexture
 	DisplayImage( depthTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );

    glutSwapBuffers();
    glutPostRedisplay();

}

void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height )
{
    glBindTexture(GL_TEXTURE_2D, texture);
    glEnable(GL_TEXTURE_2D);
    glDisable(GL_DEPTH_TEST);

    glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

    glMatrixMode(GL_PROJECTION);
    glPushMatrix();
    glLoadIdentity();
    glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);

    glMatrixMode( GL_MODELVIEW);
    glLoadIdentity();

    glPushAttrib( GL_VIEWPORT_BIT );
    glViewport(x, y, width, height );

    glBegin(GL_QUADS);
    glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, -1.0, 0.5);
    glTexCoord2f(1.0, 0.0); glVertex3f(1.0, -1.0, 0.5);
    glTexCoord2f(1.0, 1.0); glVertex3f(1.0, 1.0, 0.5);
    glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, 1.0, 0.5);
    glEnd();

    glPopAttrib();

    glMatrixMode(GL_PROJECTION);
    glPopMatrix();

    glDisable(GL_TEXTURE_2D);
}

void KeyboardGL( unsigned char key, int x, int y )
{
    switch( key )
    {
    case '

#include <stdio.h>
#include <stdlib.h>
#include
#include
#include
#include <math.h>
#define GLEW_STATIC // Specify GLEW_STATIC to use the static linked library (.lib) instead of the dynamic linked library (.dll) for GLEW
#include <GL/glew.h>
#include <glut.h>

// CUDA headers
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

#include “Postprocess.cu”

#define SRC_BUFFER 0
#define DST_BUFFER 1
#define SRC_DEPTH 2

int g_iGLUTWindowHandle = 0;
int g_iWindowPositionX = 0;
int g_iWindowPositionY = 0;
int g_iWindowWidth = 512;
int g_iWindowHeight = 512;

int g_iImageWidth = g_iWindowWidth;
int g_iImageHeight = g_iWindowHeight;

float g_fRotate[3] = { 0.0f, 0.0f, 0.0f }; // Rotation parameter for scene object.

float g_fBlurRadius = 2.0f; // Radius of 2D convolution blur performed in postprocess step.

GLuint g_GLFramebuffer = 0; // Frame buffer object for off-screen rendering.
GLuint g_GLColorAttachment0 = 0; // Color texture to attach to frame buffer object.
GLuint g_GLDepthAttachment = 0; // Depth buffer to attach to frame buffer object.
GLuint depthTexture = 0;

GLuint g_GLPostprocessTexture = 0; // This is where the result of the post-process effect will go.
// This is also the final texture that will be blit to the back buffer for viewing.

// The CUDA Graphics Resource is used to map the OpenGL texture to a CUDA
// buffer that can be used in a CUDA kernel.
// We need 2 resource: One will be used to map to the color attachment of the
// framebuffer and used read-only from the CUDA kernel (SRC_BUFFER),
// the second is used to write the postprocess effect to (DST_BUFFER).
cudaGraphicsResource_t g_CUDAGraphicsResource[3] = { 0, 0, 0};

using namespace std;

// Initialize OpenGL/GLUT
bool InitGL( int argc, char* argv );
// Initialize CUDA for OpenGL
void InitCUDA();
// Render a texture object to the current framebuffer
void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height );

// Create a framebuffer object that is used for offscreen rendering.
void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthTexture );
void DeleteFramebuffer( GLuint& framebuffer );

void CreatePBO( GLuint& bufferID, size_t size );
void DeletePBO( GLuint& bufferID );

void CreateTexture( GLuint& texture, unsigned int width, unsigned int height );
void DeleteTexture( GLuint& texture );

// Links a OpenGL texture object to a CUDA resource that can be used in the CUDA kernel.
void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags );
void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource );

void IdleGL();
void DisplayGL();
void KeyboardGL( unsigned char key, int x, int y );
void ReshapeGL( int w, int h );

void Cleanup( int errorCode, bool bExit = true )
{
if ( g_iGLUTWindowHandle != 0 )
{
glutDestroyWindow( g_iGLUTWindowHandle );
g_iGLUTWindowHandle = 0;
}
if ( bExit )
{
exit( errorCode );
}
}

// Create a texture resource for rendering to.
void CreateTexture( GLuint& texture, unsigned int width, unsigned int height )
{
// Make sure we don’t already have a texture defined here
DeleteTexture( texture );

glGenTextures( 1, &texture );
glBindTexture( GL_TEXTURE_2D, texture );

// set basic parameters
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

// Create texture data (4-component unsigned byte)
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );

// Unbind the texture
glBindTexture( GL_TEXTURE_2D, 0 );

}

void DeleteTexture( GLuint& texture )
{
if ( texture != 0 )
{
glDeleteTextures(1, &texture );
texture = 0;
}
}

void CreateDepthTexture(GLuint& txDepth, unsigned int width, unsigned int height)
{
DeleteTexture(txDepth);

glGenTextures(1, &txDepth);
glBindTexture(GL_TEXTURE_2D, txDepth);

glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);	

glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT32, width, height, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL);

glBindTexture(GL_TEXTURE_2D, 0);

}

void CreateFramebuffer( GLuint& framebuffer, GLuint colorAttachment0, GLuint depthTexture )
{
// Delete the existing framebuffer if it exists.
DeleteFramebuffer( framebuffer );

glGenFramebuffers( 1, &framebuffer );
glBindFramebuffer( GL_FRAMEBUFFER, framebuffer );

// Attach the texture to FBO depth attachment point
glBindTexture(GL_TEXTURE_2D, depthTexture);

glFramebufferTexture( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, depthTexture, 0 );

glBindTexture(GL_TEXTURE_2D, colorAttachment0);
glFramebufferTexture( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, colorAttachment0, 0 );

// Check to see if the frame buffer is valid
GLenum fboStatus = glCheckFramebufferStatus( GL_FRAMEBUFFER );
if ( fboStatus != GL_FRAMEBUFFER_COMPLETE )
{
std::cerr << “ERROR: Incomplete framebuffer status.” << std::endl;
}

// Unbind the frame buffer
glBindFramebuffer( GL_FRAMEBUFFER, 0 );

}

void DeleteFramebuffer( GLuint& framebuffer )
{
if ( framebuffer != 0 )
{
glDeleteFramebuffers( 1, &framebuffer );
framebuffer = 0;
}
}

void CreateCUDAResource( cudaGraphicsResource_t& cudaResource, GLuint GLtexture, cudaGraphicsMapFlags mapFlags )
{
// Map the GL texture resource with the CUDA resource
cudaGraphicsGLRegisterImage( &cudaResource, GLtexture, GL_TEXTURE_2D, mapFlags );
}

void DeleteCUDAResource( cudaGraphicsResource_t& cudaResource )
{
if ( cudaResource != 0 )
{
cudaGraphicsUnregisterResource( cudaResource );
cudaResource = 0;
}
}

int main( int argc, char* argv )
{
glutInit(&argc, argv);

// Init GLUT
if ( !InitGL( argc, argv ) )
{
    std::cerr << "ERROR: Failed to initialize OpenGL" << std::endl;
}

// InitCUDA();

// Startup our GL render loop
glutMainLoop();

}

bool InitGL( int argc, char* argv )
{

int iScreenWidth = glutGet(GLUT_SCREEN_WIDTH);
int iScreenHeight = glutGet(GLUT_SCREEN_HEIGHT);

glutInitDisplayMode( GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH );
glutInitWindowPosition( iScreenWidth / 2 - g_iWindowWidth / 2,
    iScreenHeight / 2 - g_iWindowHeight / 2 );
glutInitWindowSize( g_iWindowWidth, g_iWindowHeight );

g_iGLUTWindowHandle = glutCreateWindow( "Postprocess GL" );

// Register GLUT callbacks
glutDisplayFunc(DisplayGL);
glutKeyboardFunc(KeyboardGL);
glutReshapeFunc(ReshapeGL);
glutIdleFunc(IdleGL);

// Init GLEW
glewInit();
GLboolean gGLEW = glewIsSupported(
    "GL_VERSION_3_1 " 
    "GL_ARB_pixel_buffer_object "
    "GL_ARB_framebuffer_object "
    "GL_ARB_copy_buffer " 
    );

int maxAttachemnts = 0;
glGetIntegerv( GL_MAX_COLOR_ATTACHMENTS, &maxAttachemnts );

if ( !gGLEW ) return false;

glClearColor( 1.0f, 1.0f, 1.0f, 1.0f );

// Setup the viewport
glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

// Setup the projection matrix
glMatrixMode( GL_PROJECTION );
glLoadIdentity();

gluPerspective( 60.0, (GLdouble)g_iWindowWidth/(GLdouble)g_iWindowHeight, 0.0, 1.0 );
glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );

return true;

}

void InitCUDA()
{
// We have to call cudaGLSetGLDevice if we want to use OpenGL interoperability.
cudaGLSetGLDevice(0);
}

void IdleGL()
{
glutPostRedisplay();
}

// Render the initial scene
void RenderScene()
{
glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

glMatrixMode( GL_PROJECTION );
glLoadIdentity();
gluPerspective( 60.0, (GLdouble)g_iWindowWidth / (GLdouble)g_iWindowHeight, 0.1, 10.0 );

glMatrixMode( GL_MODELVIEW );
glLoadIdentity();
glTranslatef( 0.0f, 0.0f, -4.0f );

glViewport( 0, 0, g_iWindowWidth, g_iWindowHeight );

glEnable( GL_LIGHTING );
glEnable( GL_DEPTH_TEST );
glDepthFunc( GL_LESS );

gluLookAt( 0, 0, 1.2, // eye pos
50, 0, 1, // aim point
0, 0, 1); // up direction
glutSolidTeapot( 1.0 );

}

// Perform a post-process effect on the current framebuffer (back buffer)
void Postprocess()
{
glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
glBindTexture( GL_TEXTURE_2D, depthTexture);

glCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, 0, 0, g_iImageWidth, g_iImageHeight, 0 );

   glBindTexture( GL_TEXTURE_2D, 0 );
   glBindFramebuffer( GL_FRAMEBUFFER, 0 );

// PostprocessCUDA( g_CUDAGraphicsResource[DST_BUFFER], g_CUDAGraphicsResource[SRC_BUFFER], g_CUDAGraphicsResource[SRC_DEPTH] , g_iImageWidth, g_iImageHeight );

}

void DisplayGL()
{
// Bind the framebuffer that we want to use as the render target.
glBindFramebuffer( GL_FRAMEBUFFER, g_GLFramebuffer );
RenderScene();
// Unbind the framebuffer so we render to the back buffer again.
glBindFramebuffer( GL_FRAMEBUFFER, 0 );

Postprocess();

// Blit the image full-screen

// DisplayImage( g_GLPostprocessTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );
//depthTexture
DisplayImage( depthTexture, 0, 0, g_iWindowWidth, g_iWindowHeight );

glutSwapBuffers();
glutPostRedisplay();

}

void DisplayImage( GLuint texture, unsigned int x, unsigned int y, unsigned int width, unsigned int height )
{
glBindTexture(GL_TEXTURE_2D, texture);
glEnable(GL_TEXTURE_2D);
glDisable(GL_DEPTH_TEST);

glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);

glMatrixMode( GL_MODELVIEW);
glLoadIdentity();

glPushAttrib( GL_VIEWPORT_BIT );
glViewport(x, y, width, height );

glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, -1.0, 0.5);
glTexCoord2f(1.0, 0.0); glVertex3f(1.0, -1.0, 0.5);
glTexCoord2f(1.0, 1.0); glVertex3f(1.0, 1.0, 0.5);
glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, 1.0, 0.5);
glEnd();

glPopAttrib();

glMatrixMode(GL_PROJECTION);
glPopMatrix();

glDisable(GL_TEXTURE_2D);

}

void KeyboardGL( unsigned char key, int x, int y )
{
switch( key )
{
case ‘\033’: // escape quits
case ‘Q’: // Q quits
case ‘q’: // q quits
{
// Cleanup up and quit
Cleanup(0);
}
break;
}

glutPostRedisplay();

}

void ReshapeGL( int w, int h )
{
h = std::max(h, 1);

g_iWindowWidth = w;
g_iWindowHeight = h;

g_iImageWidth = w;
g_iImageHeight = h;

// Create a surface texture to render the scene to.
CreateTexture( g_GLColorAttachment0, g_iImageWidth, g_iImageHeight );
CreateDepthTexture( depthTexture, g_iImageWidth, g_iImageHeight);

// Attach the color and depth textures to the framebuffer.
CreateFramebuffer( g_GLFramebuffer, g_GLColorAttachment0,  depthTexture);

// Create a texture to render the post-process effect to.
CreateTexture( g_GLPostprocessTexture, g_iImageWidth, g_iImageHeight );

// Map the color attachment to a CUDA graphics resource so we can read it in a CUDA a kernel.

// CreateCUDAResource( g_CUDAGraphicsResource[SRC_BUFFER], g_GLColorAttachment0, cudaGraphicsMapFlagsReadOnly );
// CreateCUDAResource( g_CUDAGraphicsResource[SRC_DEPTH], depthTexture, cudaGraphicsMapFlagsReadOnly );
// Map the post-process texture to the CUDA resource so it can be
// written in the kernel.
// CreateCUDAResource( g_CUDAGraphicsResource[DST_BUFFER], g_GLPostprocessTexture, cudaGraphicsMapFlagsWriteDiscard );

glutPostRedisplay();

}

33': // escape quits
    case 'Q':    // Q quits
    case 'q':    // q quits
        {
            // Cleanup up and quit
            Cleanup(0);
        }
        break;
    }

    glutPostRedisplay();
}

void ReshapeGL( int w, int h )
{
    h = std::max(h, 1);

    g_iWindowWidth = w;
    g_iWindowHeight = h;

    g_iImageWidth = w;
    g_iImageHeight = h;

    // Create a surface texture to render the scene to.
    CreateTexture( g_GLColorAttachment0, g_iImageWidth, g_iImageHeight );
    CreateDepthTexture( depthTexture, g_iImageWidth, g_iImageHeight);

    // Attach the color and depth textures to the framebuffer.
    CreateFramebuffer( g_GLFramebuffer, g_GLColorAttachment0,  depthTexture);

    // Create a texture to render the post-process effect to.
    CreateTexture( g_GLPostprocessTexture, g_iImageWidth, g_iImageHeight );

    // Map the color attachment to a CUDA graphics resource so we can read it in a CUDA a kernel.
//    CreateCUDAResource( g_CUDAGraphicsResource[SRC_BUFFER], g_GLColorAttachment0, cudaGraphicsMapFlagsReadOnly );
 //   CreateCUDAResource( g_CUDAGraphicsResource[SRC_DEPTH], depthTexture, cudaGraphicsMapFlagsReadOnly );
    // Map the post-process texture to the CUDA resource so it can be 
    // written in the kernel.
//    CreateCUDAResource( g_CUDAGraphicsResource[DST_BUFFER], g_GLPostprocessTexture, cudaGraphicsMapFlagsWriteDiscard );

    glutPostRedisplay();
}

The Postprocess.cu

#include <cuda_runtime_api.h>
#include "Postprocess.h"

#define BLOCK_SIZE 16     // block size

texture<uchar4, cudaTextureType2D, cudaReadModeElementType> texRef;
texture<float, cudaTextureType2D, cudaReadModeElementType> depRef;

__global__ void PostprocessKernel( uchar4* dst, unsigned int imgWidth, unsigned int imgHeight )
{
    unsigned int tx = threadIdx.x;
    unsigned int ty = threadIdx.y;
    unsigned int bw = blockDim.x;
    unsigned int bh = blockDim.y;
    // Non-normalized U, V coordinates of input texture for current thread.
    unsigned int u = ( bw * blockIdx.x ) + tx;
    unsigned int v = ( bh * blockIdx.y ) + ty;

    // Early-out if we are beyond the texture coordinates for our texture.
    if ( u > imgWidth || v > imgHeight ) return;

    unsigned int index = ( v * imgWidth ) + u;
    uchar4 color = tex2D( texRef, u, v );
    float depth = tex2D(depRef, u, v);

    dst[index] = make_uchar4( color.x, color.y, color.z, 1);
    
 //  dst[index] = make_uchar4( depth*255, depth*255, depth*255, 1);
   }

uchar4* g_dstBuffer = NULL;
size_t g_BufferSize = 0; 

void PostprocessCUDA( cudaGraphicsResource_t& dst, cudaGraphicsResource_t& src, cudaGraphicsResource_t& srcDepth,  unsigned int width, unsigned int height)
{

  //  cudaGraphicsResource_t resources[3] = { src, srcDepth, dst };

    // Map the resources so they can be used in the kernel.
   cudaGraphicsMapResources( 1, &src ) ;
   cudaGraphicsMapResources( 1, &srcDepth ) ;
   cudaGraphicsMapResources(1, &dst ) ;

	
   cudaArray* srcArray;
   cudaArray* dstArray;   
   cudaArray* srcDepthArray;   

    // Get a device pointer to the OpenGL buffers
   cudaGraphicsSubResourceGetMappedArray( &srcArray, src, 0, 0 ) ;
   cudaGraphicsSubResourceGetMappedArray( &srcDepthArray, srcDepth, 0, 0 ) ;
   cudaGraphicsSubResourceGetMappedArray( &dstArray, dst, 0, 0 ) ;
 
    // Map the source texture to a texture reference.
     cudaBindTextureToArray( texRef, srcArray );
     cudaBindTextureToArray( depRef, srcDepthArray );
     
     // Destination buffer to store the result of the postprocess effect.
    size_t bufferSize = width * height * sizeof(uchar4);
    if ( g_BufferSize != bufferSize )
    {
        if ( g_dstBuffer != NULL )
        {
            cudaFree( g_dstBuffer );
        }
        // Only re-allocate the global memory buffer if the screen size changes, 
        // or it has never been allocated before (g_BufferSize is still 0)
        g_BufferSize = bufferSize;
        cudaMalloc( &g_dstBuffer, g_BufferSize );
    }

    // Compute the grid size
    size_t blocksW = (size_t)ceilf( width / (float)BLOCK_SIZE );
    size_t blocksH = (size_t)ceilf( height / (float)BLOCK_SIZE );
    dim3 gridDim( blocksW, blocksH, 1 );
    dim3 blockDim( BLOCK_SIZE, BLOCK_SIZE, 1 );

    PostprocessKernel<<< gridDim, blockDim >>>( g_dstBuffer, width, height );

    // Copy the destination back to the source array
    cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice  );

    // Unbind the texture reference
    cudaUnbindTexture( texRef);
    cudaUnbindTexture( depRef);
    
    // Unmap the resources again so the texture can be rendered in OpenGL
     cudaGraphicsUnmapResources( 1, &src ) ;
    cudaGraphicsUnmapResources( 1, &srcDepth ) ;
    cudaGraphicsUnmapResources( 1, &dst ) ;

}

And the CMakeLists.txt

cmake_minimum_required (VERSION 2.8)
project (PostprocessLinux)

find_package(CUDA)

include_directories(${PROJECT_SOURCE_DIR}/include)
link_directories(${PROJECT_SOURCE_DIR}/lib)

cuda_include_directories("/usr/local/cuda-6.5/include")
link_directories("/usr/local/cuda-6.5/lib64")

cuda_compile(Postprocess_O, Postprocess.cu)
cuda_compile(Main_O main.cu)

cuda_add_executable (test ${Postprocess_O} ${Main_O} )

target_link_libraries(test "-lglut" "-lGL" "-lGLU" "-lGLEW" "-lpthread" "-lcudart")

INSTALL(TARGETS test RUNTIME DESTINATION bin LIBRARY DESTINATION lib)