imageAtomicAdd ?

Hi there,

I wonder if anyone would confirm or deny that imageAtomicAdd works from inside
a fragment shader in NVIDIA GPU… please? At this point I’m inclined to state
that it is just plain ignored.

  1. Same code works for an Intel HD GPU.
  2. NVIDIA GameWorks Graphics Samples and nvpro samples from NVIDIA have no mention
    of this call.

NVIDIA Quadro M1000M, driver 352.63, Ubuntu 14.04.

Expected output would be
Geschlossen 15.6. 50000, 13

Good luck to us,
Mikhail

Code is:

#include <stdlib.h>
#include <stdio.h>

// #include <GL/glext.h>
// #include <GL/glew.h>

#include <epoxy/gl.h>
#include <epoxy/glx.h>

#include <GL/glut.h>
#include <GL/freeglut_ext.h>

#include <opencv2/highgui/highgui.hpp>

static void checkError(){
  GLenum r = glGetError();
  if(r != GL_NO_ERROR){
    fprintf(stderr, "Aborting, error #%d: '%s'\n",
      (int)r, gluErrorString(r));
    abort();
  }
}

static float vertex_array[] = {
  150, 75, 0.5,
  50, 325, 0.5,
  250, 325, 0.5,
  200, 125, -0.5,
  100, 375, -0.5,
  300, 375, -0.5};
static float color_array[] = {
  1,1,0,
  1,1,0,
  1,1,0,
  0,0,1,
  0,0,1,
  0,0,1};

static void displayCB(void)		/* function called whenever redisplay needed */
{
  glClear(GL_COLOR_BUFFER_BIT);		/* clear the display */
  // glColor3f(1.0, 1.0, 1.0);		/* set current color to white */

  // glBegin(GL_POLYGON);			/* draw filled triangle */
  // glVertex2i(200,125);			 specify each vertex of triangle 
  // glVertex2i(100,375);
  // glVertex2i(300,375);
  // glEnd();				/* OpenGL draws the filled triangle */
  // glDrawArrays(GL_TRIANGLES, 0, 6);

  int nvertices = sizeof(vertex_array)/sizeof(float)/3;
  glDrawArrays(GL_TRIANGLES, 0, nvertices);

  glFlush();				/* Complete any pending operations */
}

static void keyCB(unsigned char key, int x, int y)	/* called on key press */
{
  if( key == 'q' ) exit(0);
}


static void reportCompileError(int shader){
    GLint maxLength = 0;
    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength);

    // The maxLength includes the NULL character
    std::vector<GLchar> errorLog(maxLength);
    glGetShaderInfoLog(shader, maxLength, &maxLength, &errorLog[0]);

    fwrite(&errorLog[0], sizeof(GLchar), maxLength, stderr);

    // Provide the infolog in whatever manor you deem best.
    // Exit with failure.
    glDeleteShader(shader); // Don't leak the shader.
}

// Basically, from Wikipedia.
void MyOrtho2D(float* mat, float left, float right, float bottom, float top)
{
    // this is basically from
    // http://en.wikipedia.org/wiki/Orthographic_projection_(geometry)
    const float zNear = -1.0f;
    const float zFar = 1.0f;
    const float inv_z = 1.0f / (zFar - zNear);
    const float inv_y = 1.0f / (top - bottom);
    const float inv_x = 1.0f / (right - left);

    //first column
    *mat++ = (2.0f*inv_x);
    *mat++ = (0.0f);
    *mat++ = (0.0f);
    *mat++ = (0.0f);

    //second
    *mat++ = (0.0f);
    *mat++ = (2.0*inv_y);
    *mat++ = (0.0f);
    *mat++ = (0.0f);

    //third
    *mat++ = (0.0f);
    *mat++ = (0.0f);
    *mat++ = (-2.0f*inv_z);
    *mat++ = (0.0f);

    //fourth
    *mat++ = (-(right + left)*inv_x);
    *mat++ = (-(top + bottom)*inv_y);
    *mat++ = (-(zFar + zNear)*inv_z);
    *mat++ = (1.0f);
}

main(int argc, char *argv[]){
	printf("Hello, World!\n");

  int const width = 400;
  int const height = 500;

  int win;


  glutInit(&argc, argv);                /* initialize GLUT system */
  printf("Geschlossen 1\n");

  glutInitDisplayMode(GLUT_RGB);

  // glutInitContextVersion(3,3);
  glutInitContextVersion(4,3);
  // glutInitContextProfile(GLUT_CORE_PROFILE);

  glutInitWindowSize(width, height);		/* width=width pixels height=height pixels */
  win = glutCreateWindow("Triangle");	/* create window */

  /* from this point on the current window is win */
  printf("Geschlossen 2, glGenFramebuffers %p\n", glGenFramebuffers);

  printf(
   "Running OpenGL version %s\n",
   glGetString(GL_VERSION));

  printf(
    "Running GLSL version %s\n",
    glGetString(GL_SHADING_LANGUAGE_VERSION));

  // printf(
  //   "Extensions available: %s\n",
  //   glGetString(GL_EXTENSIONS));

  GLuint FramebufferName = 0;
  glGenFramebuffers(1, &FramebufferName);
  checkError();
  printf("Geschlossen 3\n");

  glBindFramebuffer(GL_FRAMEBUFFER, FramebufferName);
  checkError();

  printf("Geschlossen 4\n");

  GLuint renderedTexture;
  glGenTextures(1, &renderedTexture);
  checkError();
  printf("Geschlossen 5\n");

  glBindTexture(GL_TEXTURE_2D, renderedTexture);
  checkError();
  printf("Geschlossen 6\n");

 // Give an empty image to OpenGL ( the last "0" )
  glTexImage2D(
    GL_TEXTURE_2D,
    0,
    GL_RGB,
    width, height,
    0,
    GL_RGB,
    GL_UNSIGNED_BYTE,
    0);
  checkError();
  printf("Geschlossen 7\n");


  // Set "renderedTexture" as our colour attachement #0
  // glFramebufferTetxure only available from OpenGL 3.2.  I'm running 3.0.  Lazy to figure out why.
  // glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, renderedTexture, 0);
  glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, renderedTexture, 0);
  checkError();
  printf("Geschlossen 8\n");

  // Set the list of draw buffers.
  GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
  printf("Geschlossen 9\n");

  glDrawBuffers(1, DrawBuffers); // "1" is the size of DrawBuffers
  checkError();
  printf("Geschlossen 10\n");

  GLuint depthTexture;
  glGenTextures(1, &depthTexture);
  checkError();
  printf("Geschlossen 5.1\n");

  glBindTexture(GL_TEXTURE_2D, depthTexture);
  checkError();
  printf("Geschlossen 6.1\n");

  std::vector<float> depth(width*height, 2);

  glTexImage2D(
    GL_TEXTURE_2D,
    0,
    GL_DEPTH_COMPONENT,
    width, height,
    0,
    GL_DEPTH_COMPONENT,
    GL_FLOAT,
    &depth[0]);
  checkError();
  printf("Geschlossen 7.1\n");

  glFramebufferTexture2D(
    GL_FRAMEBUFFER,
    GL_DEPTH_ATTACHMENT,
    GL_TEXTURE_2D,
    depthTexture,
    0);
  checkError();
  printf("Geschlossen 8.2\n");

  GLuint uniform_buffer;
  glGenBuffers(1, &uniform_buffer);
  checkError();
  printf("Geschlossen 8.3\n");

  glBindBuffer(
    /* GL_ATOMIC_COUNTER_BUFFER */ GL_UNIFORM_BUFFER,
    uniform_buffer);
  checkError();
  printf("Geschlossen 8.4\n");

  unsigned int counter = 0;
  // glBufferStorage(
  //   GL_UNIFORM_BUFFER,
  //   sizeof(counter),
  //   &counter,
  //   0);  
  glBufferData(
    /* GL_ATOMIC_COUNTER_BUFFER */ GL_UNIFORM_BUFFER,
    sizeof(counter),
    &counter,
    GL_DYNAMIC_READ);
  checkError();
  printf("Geschlossen 8.5\n");

  // XXX. OK, I don't quite understand, but I shall experiment later.
  // Why am I to unbind, then bindbase?  abd what is the difference
  // between bind and bindbase?
  glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0);
  checkError();
  printf("Geschlossen 8.6\n");

  glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, uniform_buffer);

  GLuint fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
  checkError();
  printf("Geschlossen 9.1\n");

  

  char const *fragment_shader_source = "\
#version 430\n\
#extension GL_ARB_shader_atomic_counters: require\n\
#extension GL_ARB_shader_image_load_store: require\n\
/* #extension GL_ARB_shader_image_load_store: enable */\n\
\n\
in vec3 vertex_color;\n\
\n\
layout(binding = 0) uniform atomic_uint counter;\n\
layout(/* binding = 1, */ r32ui) uniform /* volatile  */ /* coherent */ uimage1D counters;\n\
\n\
void main(){\n\
  /* gl_FragColor = gl_Color; */\n\
  /* gl_FragColor = vec4(0.3, 0.4, 0.8, 1.0); */\n\
  atomicCounterIncrement(counter);\n\
  /* imageAtomicAdd(counters, int(vertex_color.b), 1u); */\n\
  imageAtomicAdd(counters, 0, 1u);\n\
  gl_FragColor = vec4(vertex_color, 1);\n\
  gl_FragDepth = gl_FragCoord.z; /* Checking if modification makes difference ;) */\n\
}\n\
";
  glShaderSource(
    fragment_shader,
    1,
    &fragment_shader_source,
    0);
  checkError();
  printf("Geschlossen 9.2\n");

  glCompileShader(fragment_shader);
  checkError();

  GLint isCompiled = 0;
  glGetShaderiv(fragment_shader, GL_COMPILE_STATUS, &isCompiled);
  checkError();
  if(isCompiled == GL_FALSE)
  {
    reportCompileError(fragment_shader);
    return 1;
  }

  printf("Geschlossen 9.3\n");

  GLuint vertex_shader = glCreateShader(GL_VERTEX_SHADER);
  checkError();
  printf("Geschlossen 9.31\n");

  char const *vertex_shader_source = "\
#version 430\n\
\n\
uniform mat4 a_mat;\n\
/* layout(location = 1) */ in vec3 position;\n\
in vec3 color;\n\
out vec3 vertex_color;\n\
\n\
void main(){\n\
  gl_Position = a_mat * vec4(position, 1);\n\
  vertex_color = color;\n\
}\n\
";

  glShaderSource(
    vertex_shader,
    1,
    &vertex_shader_source,
    0);
  checkError();
  printf("Geschlossen 9.32\n");

  glCompileShader(vertex_shader);
  checkError();

  glGetShaderiv(vertex_shader, GL_COMPILE_STATUS, &isCompiled);
  if(isCompiled == GL_FALSE)
  {
    reportCompileError(vertex_shader);
    return 1;
  }

  printf("Geschlossen 9.33\n");

  GLuint program = glCreateProgram();
  checkError();
  printf("Geschlossen 9.4\n");

  glAttachShader(program, vertex_shader);
  checkError();
  printf("Geschlossen 9.45\n");

  glAttachShader(program, fragment_shader);
  checkError();
  printf("Geschlossen 9.5\n");

  glLinkProgram(program);
  checkError();
  printf("Geschlossen 9.6\n");

  glUseProgram(program);
  checkError();
  printf("Geschlossen 9.7\n");

  glEnable(GL_DEPTH_TEST);
  checkError();
  printf("Geschlossen 10.1\n");

  glViewport(0,0,width ,height); // Render on the whole framebuffer, complete from the lower left corner to the upper right
  checkError();
  printf("Geschlossen 12\n");

  glClearColor(0.0,0.0,0.0,0.0);	/* set background to black */
  checkError();
  printf("Geschlossen 13\n");

  // gluOrtho2D(0,width, 0,height);		/* how object is mapped to window */
  // glOrtho(0,width, 0,height, -1, 1);    /* how object is mapped to window */
  //now create a projection matrix
  float my_proj_matrix[16];
  MyOrtho2D(my_proj_matrix, 0.0f, width, 0.f, height);

  //"uProjectionMatrix" refers directly to the variable of that name in 
  // shader source
  GLint my_projection_ref = 
      glGetUniformLocation(program, "a_mat");
  checkError();
  printf("Geschlossen 13.1  my_projection_ref = %d\n", my_projection_ref);

  // send our projection matrix to the shader
  glUniformMatrix4fv(
    my_projection_ref, // location, retrieved by glGetUniformLocation
    1,                 // just one matrix
    GL_FALSE,          // not transposed
    my_proj_matrix );  // data.
  checkError();
  printf("Geschlossen 13.2\n");

  // create a vertex buffer (This is a buffer in video memory)
  GLuint my_vertex_buffer;
  glGenBuffers(1 /*ask for one buffer*/, &my_vertex_buffer);
  checkError();
  printf("Geschlossen 13.3\n");

  // GL_ARRAY_BUFFER indicates we're using this for 
  // vertex data (as opposed to things like feedback, index, or texture data)
  // so this call says use my_vertex_data as the vertex data source
  // this will become relevant as we make draw calls later 
  glBindBuffer(GL_ARRAY_BUFFER, my_vertex_buffer);
  checkError();
  printf("Geschlossen 13.4\n");

  // allocate some space for our buffer
  glBufferData(GL_ARRAY_BUFFER, sizeof(vertex_array), vertex_array, GL_STATIC_DRAW);
  checkError();
  printf("Geschlossen 13.5\n");

  GLuint my_vao;
  glGenVertexArrays(1, &my_vao);
  checkError();
  printf("Geschlossen 13.6\n");

  //lets use the VAO we created
  glBindVertexArray(my_vao);
  checkError();
  printf("Geschlossen 13.7\n");

  // now we need to tell the VAO how the vertices in my_vertex_buffer
  // are structured
  // our vertices are really simple: each one has 2 floats of position data
  // they could have been more complicated (texture coordinates, color -- 
  // whatever you want)

  // enable the first attribute in our VAO
  glEnableVertexAttribArray(0);  // out VAO si chosen because it is currently bound.
  checkError();
  printf("Geschlossen 13.8\n");

  // describe what the data for this attribute is like
  glVertexAttribPointer(0, // the index we just enabled
      3, // the number of components (our two position floats) 
      GL_FLOAT, // the type of each component
      false, // should the GL normalize this for us?
      3 * sizeof(float), // number of bytes until the next component like this
      (void*)0); // the offset into GL_ARRAY_BUFFER target where this element starts
  checkError();
  printf("Geschlossen 13.9\n");


  // create a vertex buffer (This is a buffer in video memory)
  GLuint my_color_buffer;
  glGenBuffers(1 /*ask for one buffer*/, &my_color_buffer);
  checkError();
  printf("Geschlossen 14.3\n");

  // GL_ARRAY_BUFFER indicates we're using this for 
  // vertex data (as opposed to things like feedback, index, or texture data)
  // so this call says use my_vertex_data as the vertex data source
  // this will become relevant as we make draw calls later 
  glBindBuffer(GL_ARRAY_BUFFER, my_color_buffer);
  checkError();
  printf("Geschlossen 14.4\n");

  // allocate some space for our buffer
  glBufferData(GL_ARRAY_BUFFER, sizeof(color_array), color_array, GL_STATIC_DRAW);
  checkError();
  printf("Geschlossen 14.5\n");

  // enable the first attribute in our VAO
  glEnableVertexAttribArray(1);  // out VAO si chosen because it is currently bound.
  checkError();
  printf("Geschlossen 14.8\n");

  // describe what the data for this attribute is like
  glVertexAttribPointer(1, // the index we just enabled
      3, // the number of components (our two position floats) 
      GL_FLOAT, // the type of each component
      false, // should the GL normalize this for us?
      3 * sizeof(float), // number of bytes until the next component like this
      (void*)0); // the offset into GL_ARRAY_BUFFER target where this element starts
  checkError();
  printf("Geschlossen 14.9\n");

  // OK, let's set it through uniform.
  GLint counters_ref = 
      glGetUniformLocation(program, "counters");
  checkError();
  printf("Geschlossen 14.91  counters_ref = %d\n", counters_ref);

  GLuint countersTexture;
  glGenTextures(1, &countersTexture);
  checkError();
  printf("Geschlossen 14.921.  conutersTexture %d\n", countersTexture);

  glBindTexture(GL_TEXTURE_1D, countersTexture);
  checkError();
  printf("Geschlossen 14.922\n");

  std::vector<uint32_t> counter_vector(2, 0);

  // Can we write there?
  counter_vector[1] = 13;

  glTexImage1D(
    GL_TEXTURE_1D,
    0,
    GL_R32UI,
    2,  // two elements
    0,
    GL_RED_INTEGER,
    GL_UNSIGNED_INT,
    &counter_vector[0]);
  checkError();
  printf("Geschlossen 14.923\n");

  // Read counter in;
  glGetBufferSubData(
    GL_ATOMIC_COUNTER_BUFFER,
    0,
    sizeof(counter),
    &counter);
  checkError();
  printf("Geschlossen 14.95.  counter is %d\n", counter);

  int const counters_unit = 0;

  glUniform1i(counters_ref, counters_unit);
  checkError();
  printf("Geschlossen 14.955.\n");

  glBindImageTexture(
    counters_unit,
    countersTexture,
    0,
    GL_FALSE,
    0,
    GL_READ_WRITE,    
    GL_R32UI);
  checkError();
  printf("Geschlossen 14.96\n");

  // Making sure we are actually reading someting :)
  counter_vector[0] = counter_vector[1] = 1;

  glGetTexImage(
    GL_TEXTURE_1D,
    0,
    GL_RED_INTEGER,
    GL_UNSIGNED_INT,
    &counter_vector[0]);    
  checkError();
  printf("Geschlossen 14.97. %d, %d\n", counter_vector[0], counter_vector[1]);

  // This gives an invalid argument error.
  // glEnable(GL_TEXTURE_1D);
  // checkError();
  // printf("Geschlossen 14.98.\n");

  displayCB();
  checkError();
  printf("Geschlossen 15\n");

  // Desperation?  Yes, did not help.
  glMemoryBarrier(GL_ALL_BARRIER_BITS);
  checkError();

  // Checking if this is a hardware limitation...
  GLint max_image_uniforms = -26;
  glGetIntegerv(GL_MAX_FRAGMENT_IMAGE_UNIFORMS, &max_image_uniforms);
  checkError();
  printf("Max fragment image uniforms %d\n", max_image_uniforms);

  // Read counter in;
  glGetBufferSubData(
    GL_ATOMIC_COUNTER_BUFFER,
    0,
    sizeof(counter),
    &counter);
  checkError();
  printf("Geschlossen 15.5.  counter is %d\n", counter);

  // Making sure we are actually reading someting :)
  counter_vector[0] = counter_vector[1] = 1;

  glGetTexImage(
    GL_TEXTURE_1D,
    0,
    GL_RED_INTEGER,
    GL_UNSIGNED_INT,
    &counter_vector[0]);    
  checkError();
  printf("Geschlossen 15.6. %d, %d\n", counter_vector[0], counter_vector[1]);

  // Read stuff in
  cv::Mat image(height, width , CV_8UC4);
  printf("Geschlossen 16\n");

  // glReadBuffer(GL_BACK);
  // checkError();
  printf("Geschlossen 17\n");


  glReadPixels(0,0,width,height,GL_BGRA,GL_UNSIGNED_BYTE,&image.at<cv::Vec4b>(0,0));
  checkError();
  printf("Geschlossen 18\n");

  cv::imwrite("result.png", image);
  printf("Geschlossen 19\n");

  // glutDisplayFunc(displayCB);		/* set window's display callback */
  // glutKeyboardFunc(keyCB);		/* set window's key callback */

  // glutMainLoop();			/* start processing events... */

  /* execution never reaches this point */

  return 0;
}

Output:

Hello, World!
Geschlossen 1
Geschlossen 2, glGenFramebuffers 0x7efc2a5e7960
Running OpenGL version 4.3.0 NVIDIA 352.63
Running GLSL version 4.30 NVIDIA via Cg compiler
Geschlossen 3
Geschlossen 4
Geschlossen 5
Geschlossen 6
Geschlossen 7
Geschlossen 8
Geschlossen 9
Geschlossen 10
Geschlossen 5.1
Geschlossen 6.1
Geschlossen 7.1
Geschlossen 8.2
Geschlossen 8.3
Geschlossen 8.4
Geschlossen 8.5
Geschlossen 8.6
Geschlossen 9.1
Geschlossen 9.2
Geschlossen 9.3
Geschlossen 9.31
Geschlossen 9.32
Geschlossen 9.33
Geschlossen 9.4
Geschlossen 9.45
Geschlossen 9.5
Geschlossen 9.6
Geschlossen 9.7
Geschlossen 10.1
Geschlossen 12
Geschlossen 13
Geschlossen 13.1  my_projection_ref = 0
Geschlossen 13.2
Geschlossen 13.3
Geschlossen 13.4
Geschlossen 13.5
Geschlossen 13.6
Geschlossen 13.7
Geschlossen 13.8
Geschlossen 13.9
Geschlossen 14.3
Geschlossen 14.4
Geschlossen 14.5
Geschlossen 14.8
Geschlossen 14.9
Geschlossen 14.91  counters_ref = 2
Geschlossen 14.921.  conutersTexture 3
Geschlossen 14.922
Geschlossen 14.923
Geschlossen 14.95.  counter is 0
Geschlossen 14.955.
Geschlossen 14.96
Geschlossen 14.97. 0, 13
Geschlossen 15
Max fragment image uniforms 8
Geschlossen 15.5.  counter is 50000
Geschlossen 15.6. 0, 13
Geschlossen 16
Geschlossen 17
Geschlossen 18
Geschlossen 19