[EGL_IMG_context_priority] No Performance Difference Between High and Low Priority Contexts on Nvidia 545 Driver

I want to benchmark the EGL_IMG_context_priority extension on the Nvidia Linux driver (Tesla T4, Driver Version: 545.29.02). I wrote a program that creates threads with high-priority contexts, rendering at 30 FPS. I also created another program with threads running low-priority contexts, also drawing at 30 FPS.
During benchmarking, I noticed that both programs run at the same FPS, and context priority doesn’t seem to affect performance.
Am I missing something in the implementation or testing process? Any insights would be appreciated. The sample code is attached.
opengl_priority_benchmarking.zip (4.9 KB)

/*
sudo apt update
sudo apt install g++
sudo apt install libegl1-mesa-dev
g++ test.cpp -lEGL -lGL -o test
 */

#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <GLES3/gl32.h>
#include <iostream>
#include <cstring>
#include <unistd.h>
#include <vector>
#include <thread>
#include <chrono>
#include <atomic>
#include <mutex>
#include <sstream>

#define EGL_CONTEXT_PRIORITY_HIGH_IMG 0x3101
#define EGL_CONTEXT_PRIORITY_MEDIUM_IMG 0x3102
#define EGL_CONTEXT_PRIORITY_LOW_IMG 0x3103

thread_local GLuint programObject = 0;
thread_local GLint texture_location = -1;

EGLDisplay display_;
EGLConfig config;

thread_local EGLSurface surface_;

const int width = 900;
const int height = 1600;

std::atomic<bool> running(true);
std::vector<std::atomic<double>> averageFps(500); // Atomic FPS storage for each thread
std::mutex fpsMutex; // Mutex to protect access to averageFps

void printOpenGLInfo(int threadId)
{
    const GLubyte* vendor = glGetString(GL_VENDOR);
    const GLubyte* renderer = glGetString(GL_RENDERER);
    const GLubyte* version = glGetString(GL_VERSION);
    const GLubyte* glslVersion = glGetString(GL_SHADING_LANGUAGE_VERSION);

    //std::cout << "Vendor: " << vendor << ", for thread: " << threadId << std::endl;
    //std::cout << "Renderer: " << renderer << std::endl;
    //std::cout << "OpenGL Version: " << version << std::endl;
    //std::cout << "GLSL Version: " << glslVersion << std::endl;

    std::cout << "Vendor: " << vendor 
          << ", for thread: " << threadId 
          << " (Thread ID: " << std::this_thread::get_id() << ")" 
          << std::endl;
}

GLuint LoadShader(GLenum type, const GLbyte *shaderSrc)
{
    GLuint shader;
    GLint compiled;

    shader = glCreateShader(type);
    if(shader == 0)
        return 0;

    GLchar* shSrc = (GLchar*)shaderSrc;
    glShaderSource(shader, 1, &shSrc, NULL);
    glCompileShader(shader);
    glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
    if(!compiled)
    {
        GLint infoLen = 0;
        glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen);

        if(infoLen > 1)
        {
            char* infoLog = (char*)malloc(sizeof(char) * infoLen);
            glGetShaderInfoLog(shader, infoLen, NULL, infoLog);
            printf("Error compiling shader:\n%s\n", infoLog);
            free(infoLog);
        }
        glDeleteShader(shader);
        return 0;
    }
    return shader;
}

// Draw a simple triangle
#if 0
int Init(int threadId)
{
    printOpenGLInfo(threadId);

    GLbyte vShaderStr[] =
        "#version 320 es\n"
        "layout(location = 0) in vec4 vPosition;\n"
        "void main() {\n"
        "  gl_Position = vPosition;\n"
        "}\n";

    GLbyte fShaderStr[] =
        "#version 320 es\n"
        "precision mediump float;\n"
        "out vec4 FragColor;\n"
        "void main() {\n"
        "  FragColor = vec4(0.0, 1.0, 0.0, 1.0);\n" // Green color
        "}\n";

    GLuint vertexShader;
    GLuint fragmentShader;
    GLint linked;

    vertexShader = LoadShader(GL_VERTEX_SHADER, vShaderStr);
    fragmentShader = LoadShader(GL_FRAGMENT_SHADER, fShaderStr);

    programObject = glCreateProgram();
    if (programObject == 0) {
        return 0;
    }

    glAttachShader(programObject, vertexShader);
    glAttachShader(programObject, fragmentShader);
    glBindAttribLocation(programObject, 0, "vPosition");
    glLinkProgram(programObject);

    glGetProgramiv(programObject, GL_LINK_STATUS, &linked);
    if (!linked)
    {
        GLint infoLen = 0;
        glGetProgramiv(programObject, GL_INFO_LOG_LENGTH, &infoLen);
        if(infoLen > 1)
        {
            char* infoLog = (char*)malloc(sizeof(char) * infoLen);
            glGetProgramInfoLog(programObject, infoLen, NULL, infoLog);
            printf("Error linking program:\n%s\n", infoLog);
            free(infoLog);
        }
        glDeleteProgram(programObject);
        return false;
    }

    texture_location = glGetUniformLocation(programObject, "tex");

    GLuint vbo, ibo;

    glGenBuffers(1, &vbo);
    glBindBuffer(GL_ARRAY_BUFFER, vbo);

    GLfloat vertexData[] = {
        0.0f,  0.5f, 0.0f, // Vertex 1 (X, Y)
        -0.5f, -0.5f, 0.0f, // Vertex 2 (X, Y)
        0.5f, -0.5f, 0.0f  // Vertex 3 (X, Y)
    };

    glBufferData(GL_ARRAY_BUFFER, sizeof(vertexData), vertexData, GL_STATIC_DRAW);

    glEnableVertexAttribArray(0);
    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(GLfloat), (char*)0 + 0 * sizeof(GLfloat));

    glClearColor(0.0f, 0.0f, 0.0f, 1.0f);

    return true;
}

void Draw()
{
    glClear(GL_COLOR_BUFFER_BIT);

    glUseProgram(programObject);

    glDrawArrays(GL_TRIANGLES, 0, 3);

    eglSwapBuffers(display_, surface_);

    // check for errors
    GLenum error = glGetError();
    if(error != GL_NO_ERROR) {
        std::cout << error << std::endl;
    }
}
#endif

// Draw a fullscreen quad with texture
int Init(int threadId)
{
    // Print OpenGL information
    printOpenGLInfo(threadId);

    // Vertex Shader source code
    GLbyte vShaderStr[] =
        "#version 320 es\n"
        "layout(location = 0) in vec4 vposition;\n"
        "layout(location = 1) in vec2 vtexcoord;\n"
        "out vec2 ftexcoord;\n"
        "void main() {\n"
        "   ftexcoord = vtexcoord;\n"
        "   gl_Position = vposition;\n"
        "}\n";

    // Fragment Shader source code
    GLbyte fShaderStr[] =
        "#version 320 es\n"
        "precision highp float;\n"
        "uniform sampler2D tex;\n"
        "in vec2 ftexcoord;\n"
        "layout(location = 0) out vec4 FragColor;\n"
        "void main() {\n"
        "   FragColor = texture(tex, ftexcoord);\n"
        "}\n";

    GLuint vertexShader, fragmentShader;
    GLint linked;

    // Load and compile vertex and fragment shaders
    vertexShader = LoadShader(GL_VERTEX_SHADER, vShaderStr);
    fragmentShader = LoadShader(GL_FRAGMENT_SHADER, fShaderStr);

    // Create and link the shader program
    programObject = glCreateProgram();
    if (programObject == 0)
        return 0;

    glAttachShader(programObject, vertexShader);
    glAttachShader(programObject, fragmentShader);

    // Bind the attributes
    glBindAttribLocation(programObject, 0, "vPosition");
    glBindAttribLocation(programObject, 1, "vTexcoord");

    glLinkProgram(programObject);

    // Check program link status
    glGetProgramiv(programObject, GL_LINK_STATUS, &linked);
    if (!linked)
    {
        GLint infoLen = 0;
        glGetProgramiv(programObject, GL_INFO_LOG_LENGTH, &infoLen);

        if (infoLen > 1)
        {
            char *infoLog = (char *)malloc(sizeof(char) * infoLen);
            glGetProgramInfoLog(programObject, infoLen, NULL, infoLog);
            printf("Error linking program:\n%s\n", infoLog);
            free(infoLog);
        }
        glDeleteProgram(programObject);
        return false;
    }

    // Get the location of the texture uniform
    texture_location = glGetUniformLocation(programObject, "tex");

    GLuint vbo, ibo;
    // Generate and bind the vertex buffer object (VBO)
    glGenBuffers(1, &vbo);
    glBindBuffer(GL_ARRAY_BUFFER, vbo);

    // Data for a fullscreen quad with texture coordinates
    GLfloat vertexData[] = {
        // X, Y, Z, U, V
        1.0f, 1.0f, 0.0f, 1.0f, 1.0f, // vertex 0
        -1.0f, 1.0f, 0.0f, 0.0f, 1.0f, // vertex 1
        1.0f, -1.0f, 0.0f, 1.0f, 0.0f, // vertex 2
        -1.0f, -1.0f, 0.0f, 0.0f, 0.0f // vertex 3
    };

    // Fill the VBO with data
    glBufferData(GL_ARRAY_BUFFER, sizeof(vertexData), vertexData, GL_STATIC_DRAW);

    // Set up vertex attribute pointers
    glEnableVertexAttribArray(0);
    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (char *)0);

    glEnableVertexAttribArray(1);
    glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (char *)0 + 3 * sizeof(GLfloat));

    // Generate and bind the index buffer object (IBO)
    glGenBuffers(1, &ibo);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo);

    GLuint indexData[] = {
        0, 1, 2, 2, 1, 3 // two triangles
    };

    // Fill the IBO with data
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indexData), indexData, GL_STATIC_DRAW);

    // Clear color
    glClearColor(0.0f, 0.0f, 0.0f, 1.0f);

    return true;
}

void Draw(int threadId)
{
    GLuint texture;
    // Generate and bind a new texture
    glGenTextures(1, &texture);
    glBindTexture(GL_TEXTURE_2D, texture);

    // Create some image data (simple pattern)
    #if 0
    std::vector<GLubyte> image(4 * width * height);
    for (int j = 0; j < height; ++j) {
        for (int i = 0; i < width; ++i) {
            size_t index = j * width + i;
            image[4 * index + 0] = 0xFF * (j / 10 % 2) * (i / 10 % 2); // R
            image[4 * index + 1] = 0xFF * (j / 13 % 2) * (i / 13 % 2); // G
            image[4 * index + 2] = 0xFF * (j / 17 % 2) * (i / 17 % 2); // B
            image[4 * index + 3] = 0xFF;                               // A
        }
    }
    #else
    std::vector<GLubyte> image(4 * width * height, 0xFF);
    #endif

    // Set texture parameters
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

    // Set the texture content
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, image.data());

    // Clear the color buffer
    glClear(GL_COLOR_BUFFER_BIT);

    // Use the shader program
    glUseProgram(programObject);

    // Bind texture to texture unit 0
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, texture);

    // Set texture uniform (the texture is bound to texture unit 0)
    glUniform1i(texture_location, 0);

    // Draw the quad
    glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);

    // Unbind the texture
    glBindTexture(GL_TEXTURE_2D, 0);

    // Swap buffers
    eglSwapBuffers(display_, surface_);

    // Clean up by deleting the texture (optional)
    glDeleteTextures(1, &texture);
}

void DrawThread(int threadId, EGLint priority, int fps)
{
    static const EGLint pbufAttribs[] = {
        EGL_WIDTH, width,
        EGL_HEIGHT, height,
        EGL_NONE
    };

    surface_ = eglCreatePbufferSurface(display_, config, pbufAttribs);
    if (surface_ == EGL_NO_SURFACE) {
        std::cout << "eglCreatePbufferSurface failed" << std::endl;
        return;
    }

    // Set context priority
    static const EGLint contextAttribs[] = {
        EGL_CONTEXT_CLIENT_VERSION, 3,
        EGL_CONTEXT_PRIORITY_LEVEL_IMG, priority,
        EGL_NONE
    };

    EGLContext context_;
    context_ = eglCreateContext(display_, config, EGL_NO_CONTEXT, contextAttribs);
    if (context_ == EGL_NO_CONTEXT) {
        std::cout << "eglCreateContext failed for thread " << threadId << std::endl;
        return;
    }

#if 1
    // Check the priority of the context
    EGLint contextPriority;
    eglQueryContext(display_, context_, EGL_CONTEXT_PRIORITY_LEVEL_IMG, &contextPriority);
    std::cout << "Priority level : 0x" << std::hex << contextPriority << ", for thread " << threadId << std::endl;
#endif

    if (!eglMakeCurrent(display_, surface_, surface_, context_)) {
        EGLint error = eglGetError();
        std::cout << "eglMakeCurrent failed for thread " << threadId << std::endl;
        std::cout << "EGL Error: 0x" << std::hex << error << std::endl;
        return;
    }

    if (!Init(threadId)) {
        std::cout << "Initialization failed for thread " << threadId << std::endl;
        return;
    }

    auto startTime = std::chrono::high_resolution_clock::now();
    int localFrameCount = 0;

    while (running) {
        // Measure time before Draw()
        auto drawStart = std::chrono::high_resolution_clock::now();

        Draw(threadId);
        localFrameCount++;

        // Measure time after Draw()
        auto drawEnd = std::chrono::high_resolution_clock::now();

        // Calculate time taken by Draw() in milliseconds
        std::chrono::duration<double, std::milli> drawTime = drawEnd - drawStart;
        double drawTimeMs = drawTime.count();
        //std::cout << "Draw Time: " << drawTimeMs <<" for thread " << threadId << std::endl;

        // Calculate remaining time to sleep
        double targetFrameTime = 1000.0 / fps;
        double sleepTime = targetFrameTime - drawTimeMs;

        if (sleepTime > 0) {
            std::this_thread::sleep_for(std::chrono::duration<double, std::milli>(sleepTime));
        } 

        // Calculate FPS every second
        auto currentTime = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double> elapsed = currentTime - startTime;
        if (elapsed.count() >= 1.0) {
            double threadFps = localFrameCount / elapsed.count();
            {
                std::lock_guard<std::mutex> lock(fpsMutex);
                averageFps[threadId] = threadFps;
            }
            //std::cout << "\nThread " << threadId << " - Average FPS: " << threadFps << std::endl;
            localFrameCount = 0;
            startTime = currentTime;
        }
    }

    eglMakeCurrent(display_, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
    if (context_ != EGL_NO_CONTEXT)
        eglDestroyContext(display_, context_);
}

void calculateAverageFps(int threadCount) {
    std::cout << "calculateAverageFps called" << std::endl;
    while (true) {
        std::this_thread::sleep_for(std::chrono::seconds(1));
        
        // Calculate average FPS from all threads
        double totalFps = 0.0;
        //std::stringstream fpsString;
        {
            std::lock_guard<std::mutex> lock(fpsMutex);
            for (int i = 0; i < threadCount; ++i) {
                totalFps += averageFps[i];

                //fpsString << averageFps[i];
                //if (i < threadCount - 1) {
                //    fpsString << ", ";
                //}
            }
        }

        // Print the comma-separated string of FPS values
        //std::cout << "\nFPS values: " << fpsString.str() << std::endl;
        double avgFps = totalFps / threadCount;
        std::cout << "Average FPS across " << threadCount << " threads: " << avgFps << std::endl << std::endl;
    }
}

void initOpengl()
{
    PFNEGLQUERYDEVICESEXTPROC eglQueryDevicesEXT = (PFNEGLQUERYDEVICESEXTPROC)eglGetProcAddress("eglQueryDevicesEXT");
    PFNEGLQUERYDEVICEATTRIBEXTPROC eglQueryDeviceAttribEXT = (PFNEGLQUERYDEVICEATTRIBEXTPROC)eglGetProcAddress("eglQueryDeviceAttribEXT");
    PFNEGLQUERYDEVICESTRINGEXTPROC eglQueryDeviceStringEXT = (PFNEGLQUERYDEVICESTRINGEXTPROC)eglGetProcAddress("eglQueryDeviceStringEXT");
    PFNEGLGETPLATFORMDISPLAYEXTPROC eglGetPlatformDisplayEXT = (PFNEGLGETPLATFORMDISPLAYEXTPROC)eglGetProcAddress("eglGetPlatformDisplayEXT");

    if (!eglQueryDevicesEXT || !eglQueryDeviceAttribEXT || !eglQueryDeviceStringEXT || !eglGetPlatformDisplayEXT) {
        std::cout << "Failed to load required EGL extensions." << std::endl;
        return;
    }

    EGLint numDevices;
    eglQueryDevicesEXT(0, nullptr, &numDevices);
    std::vector<EGLDeviceEXT> devices(numDevices);
    eglQueryDevicesEXT(numDevices, devices.data(), &numDevices);

#if 0
    std::cout << "\n\n";
    std::cout << "Number of EGL Devices: " << numDevices << std::endl;
    for (int i = 0; i < numDevices; ++i) {
        const char* vendor = eglQueryDeviceStringEXT(devices[i], EGL_VENDOR);
        const char* version = eglQueryDeviceStringEXT(devices[i], EGL_VERSION);
        std::cout << "Device " << i << ":" << std::endl;
        if (vendor) {
            std::cout << "  Vendor: " << vendor << std::endl;
        }
        if (version) {
            std::cout << "  Version: " << version << std::endl;
        }
    }
    std::cout << "\n\n";
#endif

    display_ = eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, devices[0], nullptr);
    if(display_ == EGL_NO_DISPLAY) {
        std::cout << "EGL Display open failed\n";
        return;
    }

#if 0
    const char* extensions = eglQueryString(display_, EGL_EXTENSIONS);
    std::cout << "EGL_EXTENSIONS: " << extensions << "\n";
    if (extensions && strstr(extensions, "EGL_IMG_context_priority")) {
        std::cout << "EGL_IMG_context_priority extension - SUPPORTED\n";
    } else {
        std::cout << "EGL_IMG_context_priority extension - NOT SUPPORTED\n";
    }
#endif

    if (!eglInitialize(display_, nullptr, nullptr)) {
        std::cout << "EGL Initialize failed\n";
        return;
    }

        static const EGLint configAttribs[] = {
        EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
        EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT,
        EGL_NONE
    };

    int n;
    if (!eglChooseConfig(display_, configAttribs, &config, 1, &n) || n == 0) {
        std::cout << "eglChooseConfig failed" << std::endl;
        return;
    }

}

int main(int argc, char** argv)
{
    if (argc != 4) {
        std::cout << "Usage: " << argv[0] << " <threadCount> <priority> <fps>" << std::endl;
        return -1;
    }

    int threadCount = std::stoi(argv[1]);
    EGLint priority = std::stoi(argv[2]);
    if (priority == 0)
        priority = EGL_CONTEXT_PRIORITY_HIGH_IMG;
    else if (priority == 1)
        priority = EGL_CONTEXT_PRIORITY_MEDIUM_IMG;
    else if (priority == 2)
        priority = EGL_CONTEXT_PRIORITY_LOW_IMG;
    else {
        std::cerr << "Invalid priority. Use '0 for high', '1 for medium', or '2 for low'." << std::endl;
        return -1;
    }
    int fps = std::stoi(argv[3]);

    // Initialize EGL
    initOpengl();

    std::vector<std::thread> threads;
    for (int i = 0; i < threadCount; ++i) {
        threads.emplace_back(DrawThread, i, priority, fps);
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
    std::thread avgFpsThread(calculateAverageFps, threadCount);

    for (auto& thread : threads) {
        if (thread.joinable()) {
            thread.join();
        }
    }

    avgFpsThread.join();

    //std::this_thread::sleep_for(std::chrono::seconds(6));
    //running = false;

    std::cout << "Main thread exiting." << std::endl;

    return 0;
}