Hello.
I’ve spent alot of time trying to avoid this issue and make my code produce correct results, but GLSL compiler breaks eveything. Here is a test application to reproduce a bug. I’ve removed 99% of code from it to make it cleaner.
Bug depends on the code itself.
I can’t safely assign a variable inside a loop.
Result should be:
2101479989,1917184202,4191109983,1779160146
But it is:
2101479989,0,4191109983,0
In practice shaders are huge, array indices can be selected randomly, based on the previous values, so loops can’t be unrolled manually, everything becomes very slow and i reach the instruction limit. Each value can depend on the previous one, etc.
main.cpp
//-----------------------------------------------------------------------------
// Dependencies: glfw, glew
// Output must be:
// OUTPUT: 2101479989,1917184202,4191109983,1779160146
//-----------------------------------------------------------------------------
#include <GL/glew.h>
#include <GLFW/glfw3.h> // window, opengl context creation
#include <stdint.h> // uint32_t, uint16_t
#include <string.h>
#include <iostream>
//-----------------------------------------------------------------------------
// GLSL functions
//-----------------------------------------------------------------------------
void printShaderInfoLog(GLuint& shader)
{
GLint status;
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE) {
std::cerr << "Failed to compile shader!" << std::endl;
GLchar log[10000];
glGetShaderInfoLog(shader, 10000, NULL, log);
std::cerr << log << std::endl;
exit(1);
}
}
//-----------------------------------------------------------------------------
void printProgramInfoLog(GLuint& program)
{
GLint status;
glGetProgramiv(program, GL_LINK_STATUS, &status);
if(status != GL_TRUE)
{
std::cerr << "Failed to link program!" << std::endl;
GLchar log[10000];
glGetProgramInfoLog(program, 10000, NULL, log);
std::cerr << log << std::endl;
glDeleteProgram(program);
exit(1);
}
}
//-----------------------------------------------------------------------------
GLuint createShaderFromFile(const char* file_name, GLenum shader_type)
{
// load from file...
FILE* file = fopen(file_name, "rb");
if (!file)
{
std::cout << "Failed to load shader: " << file_name << std::endl;
exit(1);
}
fseek(file, 0, SEEK_END);
int length = ftell(file);
fseek(file, 0, SEEK_SET);
if (length < 0)
{
std::cout << "Failed to read shader file: " << file_name << std::endl;
fclose(file);
exit(1);
}
std::string source;
source.resize(length);
int readLength = fread(&*source.begin(), 1, length, file);
fclose(file);
if (readLength != length)
{
std::cout << "Failed to read shader file: " << file_name << std::endl;
source.clear();
exit(1);
}
GLuint shader = glCreateShader(shader_type);
glShaderSource(shader, 1, (const GLchar**)&source, NULL);
glCompileShader(shader);
printShaderInfoLog(shader);
return shader;
}
//-----------------------------------------------------------------------------
GLuint linkProgram(GLuint shader)
{
GLuint program = glCreateProgram();
glAttachShader(program, shader);
glLinkProgram(program);
printProgramInfoLog(program);
return program;
}
//-----------------------------------------------------------------------------
GLuint linkProgram(GLuint shader1, GLuint shader2)
{
GLuint program = glCreateProgram();
glAttachShader(program, shader1);
glAttachShader(program, shader2);
glLinkProgram(program);
printProgramInfoLog(program);
return program;
}
//-----------------------------------------------------------------------------
// each pixel contains a result
struct pixResult
{
uint32_t v[4];
};
int main(int argc, char** argv)
{
//-----------------------------------------------------------------------------
// p 1. Initialize GLFW.
if (!glfwInit())
return -1;
// Create a windowed mode window and its OpenGL context
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5);
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GLFW_TRUE);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
GLFWwindow* window = glfwCreateWindow(640, 480, "Loop Unroll", NULL, NULL);
if (!window)
{
glfwTerminate();
return -1;
}
// Make the window's context current
glfwMakeContextCurrent(window);
glewInit();
//-----------------------------------------------------------------------------
// p 2. Init a full-screen quad buffers.
GLuint fsQuadVAO;
GLuint fsQuadBuffers[2];
const uint32_t numVertices = 4;
float vertices[numVertices][2];
vertices[0][0] = -1.0f; vertices[0][1] = -1.0f; vertices[1][0] = 1.0f; vertices[1][1] = -1.0f;
vertices[2][0] = 1.0f; vertices[2][1] = 1.0f; vertices[3][0] = -1.0f; vertices[3][1] = 1.0f;
const uint32_t numElements = 6;
const uint16_t elements[numElements] = {0, 1, 2, 2, 3, 0};
glGenVertexArrays(1, &fsQuadVAO);
glGenBuffers(2, fsQuadBuffers);
glBindVertexArray(fsQuadVAO);
glBindBuffer(GL_ARRAY_BUFFER, fsQuadBuffers[0]);
glBufferData(GL_ARRAY_BUFFER, 8*numVertices, vertices, GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, fsQuadBuffers[1]);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(unsigned short)*numElements, elements, GL_STATIC_DRAW);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(0);
glBindVertexArray(0);
//-----------------------------------------------------------------------------
// p 3. Create texture and pbo.
const GLsizei imgRes = 8;
const GLenum internalFormat = GL_RGBA32UI;
const GLsizei numComponents = 4;
const GLsizei sizeOfDataType = sizeof(uint32_t)*numComponents;
// 3.1. texture
GLuint texDynamic;
glGenTextures(1, &texDynamic);
glBindTexture(GL_TEXTURE_2D, texDynamic);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexStorage2D(GL_TEXTURE_2D, 1, internalFormat, imgRes, imgRes);
glBindTexture(GL_TEXTURE_2D, 0);
// 3.2. pbo
GLuint pbo;
glGenBuffers(1, &pbo);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo);
glBufferData(GL_PIXEL_PACK_BUFFER, imgRes * imgRes * sizeOfDataType, 0, GL_STREAM_READ);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
// 3.3 pixel data on the CPU
pixResult* pixels = new pixResult[imgRes*imgRes];
//-----------------------------------------------------------------------------
// p 4. Load shaders.
GLuint vsQuad = createShaderFromFile("quad.vert", GL_VERTEX_SHADER);
GLuint fsCompute = createShaderFromFile("unroll.frag", GL_FRAGMENT_SHADER);
GLuint glslTest = linkProgram(vsQuad, fsCompute);
glDeleteShader(vsQuad);
glDeleteShader(fsCompute);
//-----------------------------------------------------------------------------
// p.5 Computations.
glViewport(0, 0, imgRes, imgRes);
//-------------------------------------
// p.5.1 Do some computations.
glBindImageTexture(0, texDynamic, 0, GL_FALSE, 0, GL_WRITE_ONLY, internalFormat);
// bind a shader program
glUseProgram(glslTest);
// draw a full-screen quad
glBindVertexArray(fsQuadVAO);
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
glBindVertexArray(0);
// unbind
glUseProgram(0);
glBindImageTexture(0, 0, 0, GL_FALSE, 0, GL_WRITE_ONLY, internalFormat);
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
//-------------------------------------
// p.5.2. Transfer image to the PBO
// bind pbo and texture
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo);
glBindTexture(GL_TEXTURE_2D, texDynamic);
glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA_INTEGER, GL_UNSIGNED_INT, (GLvoid*)0);
//-------------------------------------
// p.5.3. Map PBO and copy to the CPU memory
uint32_t* ptr = (uint32_t*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
// copy pbo data to the cpu memory
memcpy(pixels, ptr, imgRes * imgRes * sizeOfDataType);
// unmap buffer
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
//-------------------------------------
// p 5.4 Swap front and back buffers.
glfwSwapBuffers(window);
//-------------------------------------
// p.5.5. Print result for a selected pixel
// Must be:
// OUTPUT: 2101479989,1917184202,4191109983,1779160146
size_t p = 0;
std::cout << "OUTPUT: " << pixels[p].v[0] << "," << pixels[p].v[1] << "," << pixels[p].v[2] << "," << pixels[p].v[3] << std::endl;
//-----------------------------------------------------------------------------
// p 6. Destroy texture, pbo and glsl program
glDeleteProgram(glslTest);
glDeleteTextures(1, &texDynamic);
glDeleteBuffers(1, &pbo);
// delete cpu-side pixel data
delete[] pixels;
//-----------------------------------------------------------------------------
// p 7. Destroy FSQuad
glDeleteVertexArrays(1, &fsQuadVAO);
glDeleteBuffers(2, fsQuadBuffers);
glfwTerminate();
return 0;
}
quad.vert
#version 450 core
layout(location = 0) in vec2 vertexPositionNDC;
void main()
{
gl_Position = vec4(vertexPositionNDC, 0.0, 1.0);
}
unroll.frag
#version 450 core
#extension GL_ARB_gpu_shader_int64: enable
layout(binding = 0, rgba32ui) writeonly uniform uimage2D dynamicImage;
void main()
{
// OUTPUT should be:
// OUTPUT: 2101479989,1917184202,4191109983,1779160146
uint b[8];
b[0] = 2101479989U;
b[1] = 1917184202U;
b[2] = 4191109983U;
b[3] = 1779160146U;
b[4] = 2746074432U;
b[5] = 2613207917U;
b[6] = 4045743315U;
b[7] = 4014514165U;
u64vec2 st[3];
st[0].x = packUint2x32(uvec2(b[0], b[1]));
st[0].y = packUint2x32(uvec2(b[2], b[3]));
st[1].x = packUint2x32(uvec2(b[4], b[5]));
st[1].y = packUint2x32(uvec2(b[6], b[7]));
st[2] = st[0];
u64vec2 m[48];
// v1. DOESN'T WORK!
// Produces wrong result:
// OUTPUT: 2101479989,0,4191109983,0
// Should be:
// OUTPUT: 2101479989,1917184202,4191109983,1779160146
//-----------------
// It may work OK depending on the iteration count and "io" variable.
int io = 42;
for (int i = 0; i < 8; ++i)
{
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2]; // Works OK if comment this line
io -= 6;
}
//-----------------
// v2. WORKS!
// copy manually
/*int io = 42;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;*/
/*
//-----------------
// v3. WORKS!
int io = 36;
for (int i = 0; i < 7; ++i)
{
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
}
m[io+0] = st[0];
m[io+1] = st[1];
m[io+2] = st[2];
io -= 6;
*/
uvec2 v01 = unpackUint2x32(m[0].x);
uvec2 v23 = unpackUint2x32(m[0].y);
imageStore(dynamicImage, ivec2(gl_FragCoord.xy), uvec4(v01.x, v01.y, v23.x, v23.y));
}
GPU: Nvidia GeForce GTX 690.
Driver Version: 384.98
Do you share the same/similar codebase for this OpenGL extension along with RadeonSI open-source driver?
I’ve had this issue with AMD Radeon HD 7850 in the past and it was fixed.