Possible Nvidia GLSL shader compiler problem..?

Hello,

I think i may have found a problem in Nvidia’s GLSL shader compiler on Windows.

In this GLSL snippet:

int val_i;
...
val_i = floatBitsToInt( -( intBitsToFloat( val_i ) ) );

the result of val_i contains the integer representation of the negated integer representation of the source argument instead of the expected negated float representation. It seems the compiler thinks the bit representation macros can be cancelled out without considering the negative modifier, ie that the following is occurring:

val_i = -( val_i );

The fix for this problem is to simply introduce a temporary:

float val_f = -( intBitsToFloat( val_i ) );
val_i = floatBitsToInt( val_f );

which then results in val_i containing the expected integer value of the negated float representation.

I’ve confirmed the behaviour with Nsight’s OGL shader debugging.

I’m not sure of this problem’s scope but it seems to occur when using the bit representation functions, scalar floats, (including from uniforms), and source argument modifiers, though of course the scope could be much wider. The problem doesn’t appear to occur on OSX, at least on:

OSX 10.9.5
NVIDIA GeForce GTX 780M OpenGL Engine
4.1 NVIDIA-8.26.28 310.40.55b01​

I’ve added a repro case to the bottom of this post that shows the problem, (requires GLFW & GLEW), where the define FIX_OGL_COMPILER_PROBLEM can be used to control the problem.

System tested on:
Win7 Pro 64-bit (SP1), GeForce GTX 770, 344.65
GL_RENDERER: “GeForce GTX 770/PCIe/SSE2”
GL_VERSION: “4.4.0 NVIDIA 344.65”

Can anyone else reproduce this problem?
Thanks,
Andy Slater

Repro case:
nvidia_opengl_compiler_problem.c

//! uncomment to fix the OGL compiler problme
//#define FIX_OGL_COMPILER_PROBLEM

#if !( defined( __APPLE__ ) || defined( __linux__ ) )
	#include <windows.h>
#endif
#if !defined( __APPLE__ )
	#include <GL/glew.h>
#endif

#if( defined( __APPLE__ ) || defined( __linux__ ) )
	#define GLFW_INCLUDE_GLCOREARB
#endif

#include <GLFW/glfw3.h>
#include <stdio.h>
#include <stdlib.h>

#if( defined( __APPLE__ ) || defined( __linux__ ) )
	#define outputf printf
#else
	void outputf( const char *format, ... )
	{
		char charbuf[ 2048 ];
		va_list	argp;

		va_start( argp, format );
		vsprintf( charbuf, format, argp );
		va_end( argp );

		OutputDebugString( charbuf );
	}
#endif

void flushGlErrors( const int showGlErrors )
{
	GLenum err;
	while(( err = glGetError() ) != GL_NO_ERROR )
	{
		if( showGlErrors )
		{
			outputf( "GL_ERROR = 0x%08x\n", err );
		}
	}
}

int main(void)
{
	GLFWwindow* window;
	float points[] = {
		 0.0f,  1.0f,  0.0f,
		 1.0f, -1.0f,  0.0f,
		-1.0f, -1.0f,  0.0f,
	};
	GLuint vbo = 0;
	GLuint vao = 0;
	const char* vertex_shader =
		"#version 150\n"
		"in vec3 vp;\n"
		"void main() {\n"
		"  gl_Position = vec4(vp, 1.0);\n"
		"}\n";
	const char* fragment_shader =
		"#version 150\n"
		"#extension GL_ARB_shader_bit_encoding : enable\n"
		"out vec4 frag_colour;\n"
		"void main() {\n"
		"  float val_f;\n"
		"  int   val_i;\n"
		"  val_f = ( gl_FragCoord.x / 640.0 ) * -1.0f;\n"
		"  val_i = floatBitsToInt( val_f );\n"
		#if !defined( FIX_OGL_COMPILER_PROBLEM )
			"  val_i = floatBitsToInt( ( intBitsToFloat( val_i ) ) * -1.0 );\n"
		#else
			"  val_f = -( intBitsToFloat( val_i ) );\n"
			"  val_i = floatBitsToInt( val_f );\n"
		#endif
		"  frag_colour = vec4( intBitsToFloat( val_i ), 0.0, 0.0, 1.0 );\n"
		"}";
	GLuint vs;
	GLuint fs;
	GLuint shader_programme;

	if( !glfwInit() )
	{
		return -1;
	}

	#if defined( __APPLE__ ) && defined( GLFW_INCLUDE_GLCOREARB ) 
		glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
		glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
		glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
		glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
	#endif

	window = glfwCreateWindow(640, 480, "Hello World", NULL, NULL);
	if( !window )
	{
		glfwTerminate();
		return -1;
	}

	glfwMakeContextCurrent( window );

	#if !defined( __APPLE__ )
		glewInit();
	#endif

	outputf( "Renderer: %s\n", glGetString( GL_RENDERER ) );
	outputf( "OpenGL version supported %s\n", glGetString( GL_VERSION ) );

	flushGlErrors( 0 );
	glGenBuffers(1, &vbo);
	glBindBuffer(GL_ARRAY_BUFFER, vbo);
	glBufferData(GL_ARRAY_BUFFER, 9 * sizeof(float), points, GL_STATIC_DRAW);

	glGenVertexArrays(1, &vao);
	glBindVertexArray(vao);
	glEnableVertexAttribArray(0);
	glBindBuffer(GL_ARRAY_BUFFER, vbo);
	glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, NULL);

	// vertex shader
	vs = glCreateShader(GL_VERTEX_SHADER);
	glShaderSource(vs, 1, &vertex_shader, NULL);
	glCompileShader(vs);
	{
		GLint result = 0;
		glGetShaderiv( vs, GL_COMPILE_STATUS, &result );
		if( result != GL_TRUE )
		{
			outputf( "vertex shader compile failed\n" );
		}
	}

	// fragment shader
	fs = glCreateShader(GL_FRAGMENT_SHADER);
	glShaderSource(fs, 1, &fragment_shader, NULL);
	glCompileShader(fs);
	{
		GLint result = 0;
		glGetShaderiv( fs, GL_COMPILE_STATUS, &result );
		if( result != GL_TRUE )
		{
			outputf( "fragment shader compile failed\n" );
		}
	}

	shader_programme = glCreateProgram();
	glAttachShader(shader_programme, fs);
	glAttachShader(shader_programme, vs);
	glLinkProgram(shader_programme);
	flushGlErrors( 1 );


	glClearColor(0.0f, 0.0f, 0.5f, 0);

	while( !glfwWindowShouldClose( window ) )
	{
		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
		glUseProgram(shader_programme);
		glBindVertexArray(vao);
		glDrawArrays(GL_TRIANGLES, 0, 3);

		glfwSwapBuffers(window);
	}

	glfwTerminate();
	return 0;
}

Could you please cross-check with the newest available drivers for the GTX770? 347.09 is available.
There had been a correction in the intBitsToFloats implementation around the release 346 drivers which might affect this. If that doesn’t help, I’ll let the OpenGL driver team know.

Thanks for the prompt reply.

I’ve just installed the 347.09 drivers and the problem persists.

Many thanks,
Andy Slater

System info:

NVIDIA System Information report created on: 01/12/2015 17:10:59
System name: ASLATER

[Display]
Operating System:	Windows 7 Professional, 64-bit (Service Pack 1)
DirectX version:	11.0 
GPU processor:		GeForce GTX 770
Driver version:		347.09
Direct3D API version:	11
Direct3D feature level:	11_0
CUDA Cores:		1536 
Core clock:		1045 MHz 
Memory data rate:	7010 MHz
Memory interface:	256-bit 
Memory bandwidth:	224.32 GB/s
Total available graphics memory:	4096 MB
Dedicated video memory:	2048 MB GDDR5
System video memory:	0 MB
Shared system memory:	2048 MB
Video BIOS version:	80.04.B4.00.01
IRQ:			32
Bus:			PCI Express x16 Gen2
Device ID:		10DE 1184 103310DE
Part Number:		2005 0000

[Components]

nvui.dll		8.17.13.4709		NVIDIA User Experience Driver Component
nvxdsync.exe		8.17.13.4709		NVIDIA User Experience Driver Component
nvxdplcy.dll		8.17.13.4709		NVIDIA User Experience Driver Component
nvxdbat.dll		8.17.13.4709		NVIDIA User Experience Driver Component
nvxdapix.dll		8.17.13.4709		NVIDIA User Experience Driver Component
NVCPL.DLL		8.17.13.4709		NVIDIA User Experience Driver Component
nvCplUIR.dll		8.1.740.0		NVIDIA Control Panel
nvCplUI.exe		8.1.740.0		NVIDIA Control Panel
nvWSSR.dll		6.14.13.4709		NVIDIA Workstation Server
nvWSS.dll		6.14.13.4709		NVIDIA Workstation Server
nvViTvSR.dll		6.14.13.4709		NVIDIA Video Server
nvViTvS.dll		6.14.13.4709		NVIDIA Video Server
NVSTVIEW.EXE		7.17.13.4709		NVIDIA 3D Vision Photo Viewer
NVSTTEST.EXE		7.17.13.4709		NVIDIA 3D Vision Test Application
NVSTRES.DLL		7.17.13.4709		NVIDIA 3D Vision Module
nvDispSR.dll		6.14.13.4709		NVIDIA Display Server
NVMCTRAY.DLL		8.17.13.4709		NVIDIA Media Center Library
nvDispS.dll		6.14.13.4709		NVIDIA Display Server
PhysX		09.14.0702		NVIDIA PhysX
NVCUDA.DLL		8.17.13.4709		NVIDIA CUDA 7.0.18 driver
nvGameSR.dll		6.14.13.4709		NVIDIA 3D Settings Server
nvGameS.dll		6.14.13.4709		NVIDIA 3D Settings Server

Ok, thanks for testing. I’ll forward your report to the compiler team.

FYI, the defect has been fixed inside the compiler internally and will be merged to release drivers in the future, though I can’t say which one will contain it first at this time.

Many thanks for letting me know this issue has been resolved.

Is there any chance you could get details of the fix?
Many thanks,
Andy