EGL + cuda interop. cudaGraphicsEGLRegisterImage return 999

hello, now I am trying to use EGL + cuda interop to realize a headless program on my Ubuntu 22.04 server. First I generate textures, bind them to a FBO, and use OpenGL to render to FBO and CUDA do some calculations on the textures. But after I init a FBO with textures and init a EGLImageKHR, tried to register textures on CUDA. EGLImageKHR is successful created but the API cudaGraphicsEGLRegisterImage returns 999, which is cudaErrorUnknown. Can you help me? I googled but there is little results.

Here is the minimal reproduce code:


#include <stdlib.h>
#include <stdio.h>
#include <dlfcn.h>

#include <iostream>
#include <fstream>
#include <cassert>
#include <string>
#include <vector>
#include <sstream>

#include <string>

#include <cuda_runtime.h>


// #include <EGL.h>
// #include <GL/glew.h>

// #include <EGL/egl.h>
// #include "glad/glad_egl.h"
#include <GLES3/gl3.h>
#include "glad/glad_egl.h"

#include <cuda_egl_interop.h>

using namespace std;


GLenum glCheckError_(const char *file, int line)
{
    GLenum errorCode;
    while ((errorCode = glGetError()) != GL_NO_ERROR)
    {
        std::string error;
        switch (errorCode)
        {
            case GL_INVALID_ENUM:                  error = "INVALID_ENUM"; break;
            case GL_INVALID_VALUE:                 error = "INVALID_VALUE"; break;
            case GL_INVALID_OPERATION:             error = "INVALID_OPERATION"; break;
            // case GL_STACK_OVERFLOW:                error = "STACK_OVERFLOW"; break;
            // case GL_STACK_UNDERFLOW:               error = "STACK_UNDERFLOW"; break;
            case GL_OUT_OF_MEMORY:                 error = "OUT_OF_MEMORY"; break;
            case GL_INVALID_FRAMEBUFFER_OPERATION: error = "INVALID_FRAMEBUFFER_OPERATION"; break;
        }
        std::cout << error << " | " << file << " (" << line << ")" << std::endl;
    }
    return errorCode;
}
#define glCheckError() glCheckError_(__FILE__, __LINE__) 

#define ASSERT(x, ...)                                                                       \
  do {                                                                                       \
    if (!(x)) {                                                                              \
      std::cout << "Assert failed: " #x << ", " << __FILE__ << ":" << __LINE__ << std::endl; \
      std::cout << #__VA_ARGS__ << std::endl;                                                \
      exit(-1);                                                                              \
    }                                                                                        \
  } while (false)

EGLDisplay m_eglDpy;
EGLContext m_eglCtx;
void* m_display;
void* m_surface;
void* m_context;
void* m_handle;

static const EGLint configAttribs[] = {
          EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
          EGL_BLUE_SIZE, 8,
          EGL_GREEN_SIZE, 8,
          EGL_RED_SIZE, 8,
          EGL_DEPTH_SIZE, 8,
          EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
          EGL_NONE
  }; 

static const int pbufferWidth = 9;
static const int pbufferHeight = 9;
static const EGLint pbufferAttribs[] = {
      EGL_WIDTH, pbufferWidth,
      EGL_HEIGHT, pbufferHeight,
      EGL_NONE,
};

GLuint m_FBO, m_depthBufFBO, m_tex1;

GLuint m_tex2;

EGLImageKHR m_eglImg;

cudaGraphicsResource_t m_cuResTmp;


static const char *_cudaGetErrorEnum(cudaError_t error) {
  return cudaGetErrorName(error);
}

template <typename T>
void check(T result, char const *const func, const char *const file,
           int const line) {
  if (result) {
    fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
            static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
    exit(EXIT_FAILURE);
  }
}

#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)

int main()
{
    gladLoadEGL();

    m_eglDpy = eglGetDisplay(EGL_DEFAULT_DISPLAY);

    EGLint major, minor;

    eglInitialize(m_eglDpy, &major, &minor);

    EGLint numConfigs;
    EGLConfig eglCfg;
    eglChooseConfig(m_eglDpy, configAttribs, &eglCfg, 1, &numConfigs);

    // EGLSurface eglSurf = eglCreatePbufferSurface(m_eglDpy, eglCfg, 
    //                                            pbufferAttribs);

    eglBindAPI(EGL_OPENGL_API);

    m_eglCtx = eglCreateContext(m_eglDpy, eglCfg, EGL_NO_CONTEXT, 
                                       NULL);

  
    eglMakeCurrent(m_eglDpy, EGL_NO_SURFACE, EGL_NO_SURFACE, m_eglCtx);
    // eglMakeCurrent(m_eglDpy, eglSurf, eglSurf, m_eglCtx);

    GLint pack_alignment;
    glGetIntegerv(GL_PACK_ALIGNMENT, &pack_alignment);
    GLint unpack_alignment;
    glGetIntegerv(GL_UNPACK_ALIGNMENT, &unpack_alignment);

    cout << "opengl pack info : unpack : " << unpack_alignment << " pack : " << pack_alignment << endl;

    glPixelStorei(GL_PACK_ALIGNMENT, 4);
    glPixelStorei(GL_UNPACK_ALIGNMENT, 4);

    GLint maxAttach = 0;
    glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, &maxAttach);

    GLint maxDrawBuf = 0;
    glGetIntegerv(GL_MAX_DRAW_BUFFERS, &maxDrawBuf);

    cout << "opengl maxAttach : << " << maxAttach << " maxDrawBuf << " << maxDrawBuf << endl;

    glGenFramebuffers(1, &m_FBO);
    glBindFramebuffer(GL_FRAMEBUFFER, m_FBO);


    glGenTextures(1, &m_tex1);
    glBindTexture(GL_TEXTURE_2D, m_tex1);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1920, 1080, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    glCheckError();
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glBindTexture(GL_TEXTURE_2D, 0);
    glCheckError();
    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_tex1, 0);
    glCheckError();


    glGenTextures(1, &m_tex2);
    glBindTexture(GL_TEXTURE_2D, m_tex2);
    // glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 1920, 1080, 0, GL_RGBA, GL_FLOAT, NULL);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1920, 1080, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, m_tex2, 0);
    glCheckError();

    glGenRenderbuffers(1, &m_depthBufFBO);
    glBindRenderbuffer(GL_RENDERBUFFER, m_depthBufFBO);
    glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT32F, 1920, 1080);
    glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, m_depthBufFBO);

    if(glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
        std::cout << "ERROR::FRAMEBUFFER:: Framebuffer is not complete! 111: " << std::endl;
        std::cout << "err code :" << glCheckFramebufferStatus(GL_FRAMEBUFFER) << endl;
    }

    GLuint attachments[2] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1};
    glDrawBuffers(2, attachments);
    
    glCheckError();
    glBindFramebuffer(GL_FRAMEBUFFER, 0);

    EGLint eglImgAttrs[] = {EGL_IMAGE_PRESERVED_KHR, EGL_FALSE, EGL_NONE,
                          EGL_NONE};

    m_eglImg = eglCreateImageKHR(m_eglDpy, m_eglCtx, EGL_GL_TEXTURE_2D_KHR,
        (EGLClientBuffer)m_tex2, eglImgAttrs);
    if (m_eglImg == EGL_NO_IMAGE_KHR) {
        printf("EGLImage create failed for read texture with error %d\n",
            eglGetError());
    } else {
        printf("EGLImage1 created \n");
    }

    checkCudaErrors(cudaGraphicsEGLRegisterImage(&m_cuResTmp, m_eglImg, cudaGraphicsRegisterFlagsNone));

    return 0;

}