Short Version
I was wondering if anybody knows if the CUDA driver does anything explicit in terms of modifying OpenGL state with respect to textures. I’ve got the code setup to either do some Device → Device transfers for CUDA, or non-CUDA just do OpenGL’s glTexImage2D
.
I had gotten everything setup to use BGRA, but the CUDA version seems to somehow get messed up whereas the CPU version (since glTexImage2D
is going to re-specify the storage types) will work out just fine. If I turn the knobs to make everything RGBA (including reordering actual texture source storage), both work exactly as expected.
Long Version
There’s a lot more going on here, and it’s quite possible there is a bug somewhere else. But I can’t help but feel like the cuda graphics resource is somehow changing the state associated with the texture. Sorry if this is too much information:
// helper struct that gets passed around
struct DataFeed {
GLenum mTexUnit = 0;
GLuint mTex = 0;
#if defined(USE_CUDA)
cudaArray *mCudaTexArray = nullptr;
cudaGraphicsResource_t mCudaTexGraphicsResource = nullptr;
cudaStream_t mStream = nullptr;
#endif
};
Upon creation of the viewing program, everything is setup / textures are allocated once. As far as I understand it, this first glTexImage2D
is what should be defining BGRA for both OpenGL and CUDA.
glActiveTexture(feed.mTexUnit);// i don't think this is necessary here
glGenTextures(1, &feed.mTex);
glBindTexture(GL_TEXTURE_2D, feed.mTex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// zero out the texture
glTexImage2D(
GL_TEXTURE_2D, 0, Params::fmt::internal_format, // internal_format := GL_RGBA
Params::width, Params::height, 0,
Params::fmt::format, Params::fmt::gl_type, nullptr // format := GL_BGRA,
// gl_type := GL_UNSIGNED_BYTE
);
#if defined(USE_CUDA)
// Now that the textures have been created, map the resources for use in CUDA
checkCudaExcept(cudaGraphicsGLRegisterImage(
&feed.mCudaTexGraphicsResource,
feed.mTex,
GL_TEXTURE_2D,
// we perform device -> device copies over the texture, where the
// texture memory is sent to CUDA first (memcpy elsewhere), so when this
// resource is used it's OK to obliterate
cudaGraphicsRegisterFlagsWriteDiscard
));
// Map the resource
checkCudaExcept(cudaGraphicsMapResources(
1, &feed.mCudaTexGraphicsResource, feed.mStream
));
// Specify how this resource is used
checkCudaExcept(cudaGraphicsSubResourceGetMappedArray(
&feed.mCudaTexArray,
feed.mCudaTexGraphicsResource,
0,// array index
0 // mip map level
));
// Unmap the resource
checkCudaExcept(cudaGraphicsUnmapResources(
1, &feed.mCudaTexGraphicsResource, feed.mStream
));
#endif // USE_CUDA
So during the actual application drawing, things look like this
mSharedFeedShader.bind();
glActiveTexture(curr_feed->mTexUnit);
// setup some shader uniforms etc
glBindTexture(GL_TEXTURE_2D, curr_feed->mTex);
// if curr_data is not nullptr, then a new frame needs to be copied
if(curr_data) {
// if we are using CUDA, the data feed is already on the device. use the PBOs
// to perform device -> device memory transfer from CUDA land to textures that
// OpenGL can use to render
#if defined(USE_CUDA)
checkCudaExcept(cudaMemcpyToArrayAsync(
curr_feed->mCudaTexArray, // destination
0, 0, // x, y offsets
curr_data, // source
Params::numBytes, // size in bytes
cudaMemcpyDeviceToDevice, // transfer kind
curr_feed->mStream // the stream this data is part of
));
#else
// for non-cuda, just copy the data up to the GPU using OpenGL
glTexImage2D(
GL_TEXTURE_2D, 0, Params::fmt::internal_format,
Params::width, Params::height, 0, Params::fmt::format,
Params::fmt::gl_type, curr_data
);
#endif // USE_CUDA
}
// either the data is the same (no copy), or the data was just copied. now draw
mSharedFeedShader.drawIndexed(GL_TRIANGLES, 0, 2);
what am I missing here? Thanks for any insight!