Cuda Output to OpenGL Texture

I am grabbing frames from a camera and attempting to show them in an OpenGL texture (this happens to be in a Qt app but I’m not sure that is relevant to my issue).

The display of the texture in my OpenGL context appears to be distorted and stamped 4 times.

I think this is because of some mismatch on this line:

glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, VALID_IMAGE_WIDTH, VALID_IMAGE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);

I’ve tried various options for the internalformat parameter (GL_RGBA8, GL_RGBA8UI_EXT, GL_RGBA8UI and others).

Any suggestions about what might be happening here would be much appreciated,

static const char *glsl_drawtex_vertshader_src = "#version 450\n"
        "\n"
        "in layout(location = 0) vec2 posAttr;\n"
	"in layout(location = 1) vec2 aTexCoord;\n"
        "out vec2 vTexCoord;\n"
        "void main() {\n"
        "   gl_Position = vec4(posAttr, 0.0, 1.0);\n"
        "   vTexCoord = vec2(aTexCoord.x, aTexCoord.y);\n"
        "}\n";

	static const char *glsl_drawtex_fragshader_src = "#version 450\n"
        "\n"
         "uniform sampler2D texImage;\n"
        "out vec4 fragColor;\n"
        "in vec2 vTexCoord;\n"
        "void main() {\n"
        "   vec4 c = texture2D(texImage, vTexCoord);\n"
         "   fragColor = vec4(c);\n"
        "}\n";

        GLuint tex_cudaResult;  // where we will copy the CUDA result
        struct cudaGraphicsResource *cuda_tex_result_resource;
        QOpenGLShaderProgram *m_programTex;
        uint8_t *cuda_dest_resource;
        void	*m_src[NUM_CAMERAS]; // void pointer to a frame coming from one of the cameras

	cudaDeviceProp prop;
  	int dev;

	glViewport(0, 0, 800, 480);

	glClear(GL_COLOR_BUFFER_BIT);
	printGLError("glClear(GL_COLOR_BUFFER_BIT");

	int dev;

	memset(&prop, 0, sizeof(cudaDeviceProp));
	prop.major = 8;
	prop.minor = 0;

	if (cudaChooseDevice(&dev, &prop) != cudaSuccess)
	puts("failed to choose device");

    // create a texture
    glGenTextures(1, tex_cudaResult);

    glBindTexture(GL_TEXTURE_2D, *tex_cudaResult);

    // set basic parameters
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, VALID_IMAGE_WIDTH, VALID_IMAGE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);

    checkCudaErrors(cudaGraphicsGLRegisterImage(&cuda_tex_result_resource, *tex_cudaResult,GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard));

    // add shaders
    m_programTex = new QOpenGLShaderProgram;
    m_programTex->addShaderFromSourceCode(QOpenGLShader::Vertex, glsl_drawtex_vertshader_src);
    m_programTex->addShaderFromSourceCode(QOpenGLShader::Fragment, glsl_drawtex_fragshader_src);
    m_programTex->link();
    m_programTex->bind();

    m_programTex->release();

    // init cuda buffers
    // set up vertex data parameter
    unsigned int num_texels = VALID_IMAGE_WIDTH * VALID_IMAGE_HEIGHT;
    unsigned int num_values = num_texels * VALID_BYTES_PER_PIXEL;
    unsigned int size_tex_data = sizeof(GLubyte) * num_values;
    checkCudaErrors(cudaMallocManaged((void **)&cuda_dest_resource, size_tex_data, cudaMemAttachGlobal));

    // generate cuda frame
	uint8_t *out_data;

    out_data = cuda_dest_resource;

    // CUDA debayer on frame
    gpuDebayer_optimized(reinterpret_cast<uint16_t*>(m_src[0]), reinterpret_cast<uint8_t*>(out_data), VALID_IMAGE_WIDTH, VALID_IMAGE_HEIGHT);

    // We want to copy cuda_dest_resource data to the texture
    // map buffer objects to get CUDA device pointers
    cudaArray *texture_ptr;
    checkCudaErrors(cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0));
    checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0));

    checkCudaErrors(cudaMemcpyToArray(texture_ptr, 0, 0, out_data, size_tex_data, cudaMemcpyDeviceToDevice));
    checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_tex_result_resource, 0));

    // display image to the screen as textured quad
    glBindTexture(GL_TEXTURE_2D, tex_cudaResult);

    glEnable(GL_TEXTURE_2D);
    glDisable(GL_DEPTH_TEST);
    glDisable(GL_LIGHTING);

    glViewport(0, 0, PREVIEW_GL_WIDTH, PREVIEW_GL_HEIGHT);

    m_programTex->bind();
    GLint id = m_programTex->uniformLocation("texImage");
    glUniform1i(id, 0); // texture unit 0 to "texImage"

    static const GLfloat vertices[] = {
        // positions         // texture coords
         -1.0f, -1.0f,      0.0, 0.0f,
         -1.0f, 1.0f,       0.0f, 1.0f,
         1.0f, -1.0f,       1.0f, 0.0f,
         1.0f, 1.0f,        1.0f, 1.0f,

    glGenBuffers(1, &m_vbo);
    glBindBuffer(GL_ARRAY_BUFFER, m_vbo);
    glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
    glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), 0); //maps to in layout(location = 0) vec2 vertex; in vertex shader
    glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (void*)(2 * sizeof(float))); //maps to in layout(location = 1) vec2 vertex; in vertex shader

    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glEnableVertexAttribArray(0);
    glEnableVertexAttribArray(1);

    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

    glDisableVertexAttribArray(1);

    glDisableVertexAttribArray(0);

    glDisable(GL_TEXTURE_2D);

    m_programTex->release();