I write a test program, the only thing this program does is to create an EGL context on headless server. When I run mutilple instance of this program simultiniously on a headless server (ubuntu 16.04/Ubuntu 20.04) with mutilple GPUs (Tesla T4 X 4, driver version: 460.56), some processes would occasionally failed with eglGetDisplay(EGL_DEFAULT_DISPLAY) returning EGL_NO_DISPLAY (no valid display available).
I also tested this program on single GPU (Tesla P4, driver version: 440.64.00) server, the error never occur. It seems like this problem has some relations with multiple GPUs. following is process start script and some code snap. ANY suggestion is approciated!
for i in $(seq 20)
do
./test-egl-context &
done
// 1. Initialize EGL
mDisplay = eglGetDisplay(EGL_DEFAULT_DISPLAY);
if (EGL_NO_DISPLAY == mDisplay)
{
std::cerr << "no EGL display found: " << eglGetError() << std::endl;
return false;
}
EGLint major, minor;
int ret0 = eglInitialize(mDisplay, &major, &minor);
dumpEglError("initialize");
if (EGL_TRUE != ret0)
{
std::cout << "initialize failed: " << ret0 << std::endl;
}
std::cout << "egl major: " << major << ", minor: " << minor << std::endl;
// 2. Select an appropriate configuration
EGLint numConfigs;
EGLConfig eglCfg;
const EGLint configAttribs[] = {
EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
EGL_BLUE_SIZE, 8,
EGL_GREEN_SIZE, 8,
EGL_RED_SIZE, 8,
EGL_DEPTH_SIZE, 8,
EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
EGL_NONE
};
const int pbufferWidth = 9;
const int pbufferHeight = 9;
const EGLint pbufferAttribs[] = {
EGL_WIDTH, pbufferWidth,
EGL_HEIGHT, pbufferHeight,
EGL_NONE,
};
ret0 = eglChooseConfig(mDisplay, configAttribs, &eglCfg, 1, &numConfigs);
if (EGL_TRUE != ret0)
{
std::cout << "chooseconfig failed: " << ret0 << std::endl;
}
dumpEglError("chooseConfig");
// 3. Create a surface
mSurface = eglCreatePbufferSurface(mDisplay, eglCfg,
pbufferAttribs);
dumpEglError("eglCreatePbufferSurface");
// 4. Bind the API
eglBindAPI(EGL_OPENGL_API);
// 5. Create a context and make it current
mContext = eglCreateContext(mDisplay, eglCfg, EGL_NO_CONTEXT, NULL);