My application is trying to directly set an OpenGL depth texture based on the results of a CUDA kernel. The kernel renders into a GL_PIXEL_UNPACK_BUFFER which is mapped with the cudaGraphicsMapResources. When I try to upload to the depth texture, I trigger the following warning.
Buffer performance warning: Buffer object 1 (bound to GL_PIXEL_UNPACK_BUFFER_ARB, usage hint is GL_STREAM_DRAW) is being copied/moved from VIDEO memory to HOST memory.
Here is a simplified outline of my application, where the CUDA rendering step has been replaced with a simple cudaMemset. The function init_gl() should be called once the OpenGL context has been initialized, and then work() is called in a loop.
const int width = 256;
const int height = 256;
cudaGraphicsResource_t graphics_resource = 0;
GLuint depth_texture_id = 0;
GLuint pbo_id = 0;
void init_gl() {
// initialize depth texture
glGenTextures(1, &depth_texture_id);
glBindTexture(GL_TEXTURE_2D, depth_texture_id);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
glTexImage2D(GL_TEXTURE_2D,
/*level=*/0,
/*internal_format=*/GL_DEPTH_COMPONENT32F, width, height,
/*border=*/0,
/*format=*/GL_DEPTH_COMPONENT,
/*type=*/GL_FLOAT,
/*data=*/nullptr);
glBindTexture(GL_TEXTURE_2D, 0);
// initialize PBO
glGenBuffers(1, &pbo_id);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id);
glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height * sizeof(float),
nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
// register the PBO with cuda
CUDACHECK(cudaGraphicsGLRegisterBuffer(
&graphics_resource, pbo_id, cudaGraphicsRegisterFlagsWriteDiscard));
}
void work() {
// map the pbo
CUDACHECK(cudaGraphicsMapResources(1, &graphics_resource));
size_t bufsize = 0;
void* device_ptr = nullptr;
CUDACHECK(cudaGraphicsResourceGetMappedPointer(&device_ptr, &bufsize,
graphics_resource));
// write to the pbo
cudaMemset(device_ptr, 0, bufsize);
// unmap
CUDACHECK(cudaGraphicsUnmapResources(1, &graphics_resource));
device_ptr = nullptr;
// upload pbo to the texture
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glBindTexture(GL_TEXTURE_2D, depth_texture_id);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id);
glTexSubImage2D(GL_TEXTURE_2D,
/*level=*/0,
/*xoffset=*/0,
/*yoffset=*/0, width, height,
/*format=*/GL_DEPTH_COMPONENT,
/*type=*/GL_FLOAT,
/*data=*/nullptr);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glBindTexture(GL_TEXTURE_2D, 0);
}
I would like to avoid copying the pixel buffer to host memory, and it’s unclear why the driver is doing this copy. This issue does not seem to occur for color buffers (GL_RGB).