Under some circumstances all data in shader storage buffer is replaces with zeros. I found four(!!!) factors which heals the bug (all are marked in code).
#version 440 core
// N will be defined from python script
#define N {N}
layout (local_size_x = 1) in;
struct Invocation {
float rowSums[N];
};
layout (std430, binding = 1) buffer BufferObject {
Invocation[] data;
};
layout (std430, binding = 2) buffer Test {
float test;
};
float erfcx(float x) {
const float ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi)
if (x >= 0.0) {
return ispi/3;
} else {
return -ispi/3;
}
}
void main() {
for(int i = 0; i < N; i++) {
float rowSum = 0.25;
for(int j = 0; j < N; j++) {
rowSum = erfcx(i/1000.0); // moving this out of inner loop solves the problem
}
data[0].rowSums[i] = rowSum;
}
}
and the code for invocation:
#!/usr/bin/env python3
import sys
try:
from OpenGL.GLUT import *
from OpenGL.GL import *
import OpenGL.GL.shaders as GLSL
except:
print('''
ERROR: PyOpenGL not installed properly.
''')
sys.exit()
import ctypes
import struct
import random
# Decrementing N to 427 solves the bug
N = 428
# Decrementing number of iterations solves the bug
iterations = 999
def load(filename):
with open(filename, 'r') as f:
return ''.join(f.readlines()).replace("{N}", str(N))
def test():
print(glGetString(GL_VERSION))
# loading and compiling shader
testSource = load('test.glsl')
testProgram = GLSL.compileProgram(GLSL.compileShader(testSource, GL_COMPUTE_SHADER))
# creating first buffer
invocationsBuffer = glGenBuffers(1)
glBindBuffer(GL_SHADER_STORAGE_BUFFER, invocationsBuffer)
# helping variables for passing Invocation structure
invocationFmt = '={N}f'.format(N=N)
invocationSize = struct.calcsize(invocationFmt)
# setting data in first buffer
glBufferData(GL_SHADER_STORAGE_BUFFER, invocationSize, None, GL_DYNAMIC_COPY)
data = struct.pack(invocationFmt,
*[i + 0.2 for i in range(N)]
)
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, data)
# creating second buffer
testBuffer = glGenBuffers(1)
glBindBuffer(GL_SHADER_STORAGE_BUFFER, testBuffer)
glBufferData(GL_SHADER_STORAGE_BUFFER, 4, None, GL_DYNAMIC_COPY)
# Binging program and buffers
glUseProgram(testProgram)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, invocationsBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, testBuffer)
for i in range(iterations):
# updating data in second buffer
if True: # disabling this solves the problem
data = bytes([random.randrange(0, 256), random.randrange(0, 256), random.randrange(0, 256), random.randrange(0, 256)])
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, data)
glDispatchCompute(1, 1, 1)
print(i)
if i == iterations - 1:
# reading data from first buffer
invocationData = (ctypes.c_byte*invocationSize)()
glBindBuffer(GL_SHADER_STORAGE_BUFFER, invocationsBuffer)
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, invocationSize, ctypes.byref(invocationData))
print(struct.unpack(invocationFmt, bytes(invocationData)))
glutInit(sys.argv)
glutInitDisplayMode(GLUT_SINGLE | GLUT_RGB)
win = glutCreateWindow("hello")
glutHideWindow()
test()
glutDestroyWindow(win)
Though I develop under linux (using bumblebee, if this matters) test run under windows (driver 347.09) ended even better. No zeroed buffer were printed, application just ended up hanging.
Here is my nvidia-bug-report.log.gz
Hope you’ll not ignore it just like this one.
Looks like zeros in buffer are result of hanged shader execution. Yet I can’t see why would it hang.
Seems like it is solved on windows by 347.25. I’m going to test my original code now.
Yet problem in following post persists.