std430 layout issues (bug?)

Hi, I stumbled upon this weird behavior, and I am not sure, but it might possibly be a driver bug.

If I declare a buffer storage block in my shader
layout (std430, binding = 0) buffer mybuffer
{
vec3 pos;
} ;

, then pull the data with “vec3 vecVertex = pos[iIdx].xyz;”

and I feed it, using glBindBufferBase, with a VBO containing float[3]s, i get incorrect results.
Note that if I render the same VBO using plain old VBOs (glBindBuffer/glEnableClientState(GL_VERTEX_ARRAY) and vec3 vecVertex = gl_Vertex.xyz;) things work as they should.

Things work as they should also if i use SSBOs as I described before before, but with:
layout (std430, binding = 0) buffer mybuffer
{
vec4 pos;
} ;

and the VBO containing float[4]s.

now, at page 888 of the redbook, the author writes about std430:

“An array of scalars or vectors : The size of each element in the array will be the same size of the element type, where three-component vectors are not rounded up to the size of four-component vectors. This is also the array’s alignment. The array’s size will be the element’s size times the number of elements in the array.”

My interpretation is that if I bind a buffer containing only an array of scalar or vectors, this buffer can be packed and the shader will be able to pull elements from it in a non-aligned way.

So the question is: am I experiencing a driver bug here, or did I misunderstand the specs?

ps. I’m on quadro k5000, latest drivers (347.52)

Below a simple example to trigger this problem (in Python/PyQT/PyOpenGL for simplicity)

#! /usr/bin/env python
import numpy as np
from numpy.linalg import norm
import OpenGL
import OpenGL.GL as gl
import OpenGL.GLU as glu
import OpenGL.GLUT as glut
from PyQt4 import QtGui, QtCore
from PyQt4.QtOpenGL import *
from math import *
import sys




    
_vsSphere =  """#version 440 compatibility
#extension GL_ARB_shader_storage_buffer_object : require

smooth out vec4 vecColor;
void main() 
{
    vec3 vecVertex = gl_Vertex.xyz + vec3(0,0,-1);
    gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * vec4(vecVertex,1.0);
    vecColor.rgb = vecVertex.xyz * 0.5 + 0.5; 
    vecColor.a = 1.0;
}
""" 

_fsSphere =   """#version 440 compatibility
smooth in vec4	vecColor;
void main()
{	
	gl_FragColor = vecColor;
}
"""    


_vsSphereSSBO3 =  """#version 440 compatibility
#extension GL_ARB_shader_storage_buffer_object : require

layout (std430, binding = 0) buffer VertexPositionBufferIn
{	
	vec3 pos[];
} ;

smooth out vec4 vecColor;
void main() 
{
    int iIdx = gl_VertexID;
    vec3 vecVertex = pos[iIdx].xyz; + vec3(0,0,-1);
    gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * vec4(vecVertex,1.0);
    vecColor.rgb = vecVertex.xyz * 0.5 + 0.5; 
    vecColor.a = 1.0;
}
""" 


_vsSphereSSBO4 =  """#version 440 compatibility
#extension GL_ARB_shader_storage_buffer_object : require

layout (std430, binding = 0) buffer VertexPositionBufferIn
{	
	vec4 pos[];
} ;

smooth out vec4 vecColor;
void main() 
{
    int iIdx = gl_VertexID;
    vec3 vecVertex = pos[iIdx].xyz; + vec3(0,0,-1);
    gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * vec4(vecVertex,1.0);
    vecColor.rgb = vecVertex.xyz * 0.5 + 0.5; 
    vecColor.a = 1.0;
}
""" 

fEpsilon = 0.0000001
    

mode="va"
mode="vbo"
mode="ssbo4"
#mode="ssbo3"

sphereGeometry = True
    
def onSphere(az,el):
    fProjectionLength = abs(cos(el))
    fProjectionX = cos(az) * fProjectionLength
    if abs(fProjectionX) < fEpsilon:
        fProjectionX = 0.0
    fProjectionZ = -sin(az) * fProjectionLength
    if abs(fProjectionZ) < fEpsilon:
        fProjectionZ = 0.0

    fProjectionX2 = (fProjectionX*fProjectionX)
    fProjectionZ2 = (fProjectionZ*fProjectionZ)
    fProjectionXZ2 = fProjectionX2 + fProjectionZ2
    fProjectionY = sqrt(1.0 - fProjectionXZ2 )
    if (el < 0.0):
        fProjectionY = -fProjectionY
        
    if abs(fProjectionY) < fEpsilon:
        fProjectionY = 0.0        
    point =  (fProjectionX,fProjectionY,fProjectionZ)
    return point


class Viewer3DWidget(QGLWidget):

    def __init__(self, parent):
        QGLWidget.__init__(self, parent)
       
        self.uWidth = 0
        self.uHeight = 0
        self.iVertexSize = 3
        if (mode is "ssbo4"):
            self.iVertexSize = 4
        self.vbo       = None
        
        self.vecBackgroundColor = (1.,1.,1.)
        self.iSubdivision = 3
        self.data = np.zeros((1,self.iVertexSize),dtype=np.float32)
        self.dataLinear = self.data.reshape(self.data.size)
        self.bRegenerate = True
        self.iCnt = 0

        
        
    def setSubdivision(self, iVal):
       
        self.iSubdivision = iVal
        self.bRegenerate = True
        self.update()

    def regenerateGeometry(self):
        if (not self.bRegenerate):
            return
        else:
            self.bRegenerate = False
            
        if sphereGeometry:            
            iTotalTriangles =  (self.iSubdivision - 2)*(self.iSubdivision *2) #+ (self.iSubdivision * 2)  # skipping the top and the bottom
            self.data = np.ones((iTotalTriangles * 3,self.iVertexSize),dtype=np.float32) #ones to get the last component of vec4 to 1
            fAzSlab = 2.0 * pi / float(self.iSubdivision)
            fElSlab = pi / float(self.iSubdivision)
            
            self.iCnt = 0
            for i in range(1,self.iSubdivision-1):
                for j in range(self.iSubdivision):
                    fAz0 = fAzSlab * j
                    fAz1 = fAzSlab * (j + 1)
                    fEl0 = -(fElSlab * i) + pi * 0.5
                    fEl1 = -(fElSlab * (i + 1)) + pi * 0.5
    
                    point = onSphere(fAz0,fEl1)
                    self.data[self.iCnt,0] = float(point[0])
                    self.data[self.iCnt,1] = float(point[1])
                    self.data[self.iCnt,2] = float(point[2])
                    self.iCnt += 1
                    point = onSphere(fAz1,fEl1)
                    self.data[self.iCnt,0] = float(point[0])
                    self.data[self.iCnt,1] = float(point[1])
                    self.data[self.iCnt,2] = float(point[2])                
                    self.iCnt += 1
                    point = onSphere(fAz1,fEl0)
                    self.data[self.iCnt,0] = float(point[0])
                    self.data[self.iCnt,1] = float(point[1])
                    self.data[self.iCnt,2] = float(point[2])                
                    self.iCnt += 1
                    point = onSphere(fAz1,fEl0)
                    self.data[self.iCnt,0] = float(point[0])
                    self.data[self.iCnt,1] = float(point[1])
                    self.data[self.iCnt,2] = float(point[2])                
                    self.iCnt += 1
                    point = onSphere(fAz0,fEl0)
                    self.data[self.iCnt,0] = float(point[0])
                    self.data[self.iCnt,1] = float(point[1])
                    self.data[self.iCnt,2] = float(point[2])                
                    self.iCnt += 1
                    point = onSphere(fAz0,fEl1)
                    self.data[self.iCnt,0] = float(point[0])
                    self.data[self.iCnt,1] = float(point[1])
                    self.data[self.iCnt,2] = float(point[2])                
                    self.iCnt += 1
            
            self.dataLinear = self.data.reshape(self.data.size)
            
            print self.iCnt, self.dataLinear.size, self.dataLinear.shape
        
        else:
                         
            self.data = np.ones((3,self.iVertexSize),dtype=np.float32)
            self.data[0,0] = -0.5
            self.data[0,1] = -0.5
            self.data[0,2] = 0.0
            
            self.data[1,0] = 0.5
            self.data[1,1] = -0.5
            self.data[1,2] = 0.0
            
            self.data[2,0] = 0.0
            self.data[2,1] = 0.5
            self.data[2,2] = 0.0
            
            self.dataLinear = self.data.reshape(self.data.size)
                         
                         
        if (self.vbo is None):
            self.vbo = gl.glGenBuffers(1)
 
        
        
        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self.vbo)
        gl.glBufferData(gl.GL_ARRAY_BUFFER, self.dataLinear, gl.GL_DYNAMIC_DRAW)
        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, gl.GLuint(0))
#                         


    def initializeGL(self):    
        
        self.shaShaderSphere = QGLShaderProgram(self.context())
        self.shaShaderSphere.addShaderFromSourceCode(QGLShader.Vertex, _vsSphere)
        self.shaShaderSphere.addShaderFromSourceCode(QGLShader.Fragment, _fsSphere)   
        self.shaShaderSphere.link()   
        
        self.shaShaderSphereSSBO3 = QGLShaderProgram(self.context())
        self.shaShaderSphereSSBO3.addShaderFromSourceCode(QGLShader.Vertex, _vsSphereSSBO3)
        self.shaShaderSphereSSBO3.addShaderFromSourceCode(QGLShader.Fragment, _fsSphere)   
        self.shaShaderSphereSSBO3.link()  
        
        self.shaShaderSphereSSBO4 = QGLShaderProgram(self.context())
        self.shaShaderSphereSSBO4.addShaderFromSourceCode(QGLShader.Vertex, _vsSphereSSBO4)
        self.shaShaderSphereSSBO4.addShaderFromSourceCode(QGLShader.Fragment, _fsSphere)   
        self.shaShaderSphereSSBO4.link()  

        gl.glClearColor(0, 0, 0, 1.0)
        gl.glClearDepth(1.0)

    def paintGL(self):
        if  (self.uWidth is 0):
            return           
        self.regenerateGeometry()    
                
        gl.glMatrixMode( gl.GL_PROJECTION )
        gl.glLoadIdentity()
        gl.glOrtho(-2,2,-2,2,0,3)

        gl.glMatrixMode( gl.GL_MODELVIEW );
        gl.glLoadIdentity();

#        glDisable(GL_BLEND)
#        glDisable(GL_DEPTH_TEST);  
#        glDisable(GL_LIGHTING)
        
        gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT)
        gl.glColor(1.0, 1.0,1.0)  
        
        gl.glEnable(gl.GL_DEPTH_TEST)
        gl.glDepthFunc(gl.GL_LESS);
        
        
        if (mode == "vbo"):
            self.drawSphereVBO()
        elif(mode == "va"):
            self.drawSphereArray()
        elif(mode == "ssbo3"):
            self.drawSphereSSBO3()
        elif(mode == "ssbo4"):
            self.drawSphereSSBO4()

        
    def drawSphereArray(self):  
        shaShader = self.shaShaderSphere
        shaShader.bind()
        gl.glEnableClientState(gl.GL_VERTEX_ARRAY); 
        gl.glVertexPointer(3, gl.GL_FLOAT, 0, self.dataLinear);
        #gl.glDrawArrays(gl.GL_TRIANGLES,0,self.iCnt);
        gl.glDrawArrays(gl.GL_TRIANGLES,0,self.dataLinear.size  / 3);
        gl.glDisableClientState(gl.GL_VERTEX_ARRAY);     
        shaShader.release()
        

    def drawSphereVBO(self):
        shaShader = self.shaShaderSphere
        shaShader.bind()
        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self.vbo)
        gl.glEnableClientState(gl.GL_VERTEX_ARRAY);
        gl.glVertexPointer(3, gl.GL_FLOAT, 0, None);
        
        gl.glDrawArrays(gl.GL_TRIANGLES,0,self.dataLinear.size  / 3);
        
        gl.glDisableClientState(gl.GL_VERTEX_ARRAY);
        gl.glBindBuffer(gl.GL_ARRAY_BUFFER,gl.GLuint(0))
        shaShader.release()
        
    def drawSphereSSBO3(self):
        shaShader = self.shaShaderSphereSSBO3
        shaShader.bind()
        gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, self.vbo);
        gl.glDrawArrays(gl.GL_TRIANGLES,0,self.dataLinear.size  / 3);
        gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, 0);        
        shaShader.release()
        
    def drawSphereSSBO4(self):
        shaShader = self.shaShaderSphereSSBO4
        shaShader.bind()
        gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, self.vbo);
        gl.glDrawArrays(gl.GL_TRIANGLES,0,self.dataLinear.size  / 4);
        gl.glBindBufferBase(gl.GL_SHADER_STORAGE_BUFFER, 0, 0);   
        shaShader.release()     
        

    def resizeGL(self, widthInPixels, heightInPixels):
        if ((widthInPixels is not self.uWidth) or (heightInPixels is not self.uHeight)):        
            self.uWidth = widthInPixels
            self.uHeight = heightInPixels

            gl.glViewport(0, 0, widthInPixels, heightInPixels)
            self.update()





class TestSSBOs(QtGui.QMainWindow):
    def __init__(self):
        QtGui.QMainWindow.__init__(self)
        self.setWindowTitle('TestSSBOs')
        self.statusBar().showMessage("Hello there")

        exit = QtGui.QAction("Exit", self)
        exit.setShortcut("Ctrl+Q")
        exit.setStatusTip('Exit application')
        self.connect(exit, QtCore.SIGNAL('triggered()'), QtCore.SLOT('close()'))


        self.setToolTip('This is a window, or <b>something</b>')

        self.viewer3D = Viewer3DWidget(self)

        parentWidget = QtGui.QWidget()
        slider1 = QtGui.QSlider(QtCore.Qt.Horizontal, None)
        slider1.setRange(3,50)
        slider1.setValue(0)
        slider1.setMaximumWidth(120)
        slider1.valueChanged.connect(self.slider1Handler)
        vbox = QtGui.QVBoxLayout()
        vbox.addWidget(slider1)
        vbox.addStretch(1)
        self.viewer3D.setSizePolicy( QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Expanding )
        hbox = QtGui.QHBoxLayout()
        hbox.addLayout(vbox)
        hbox.addWidget(self.viewer3D)

        parentWidget.setLayout(hbox)        
        self.setCentralWidget(parentWidget)
        

        self.resize(500,500)

    def closeEvent(self, event):
        event.accept()
    def slider1Handler(self, iVal):    
        self.viewer3D.setSubdivision(iVal)           

if __name__ == '__main__':
    # app = QtGui.QApplication(['Python Qt OpenGL Demo'])
    app = QtGui.QApplication(sys.argv)
    window = TestSSBOs()
    window.show()
    sys.exit(app.exec_())