Pipeline creation hang with simple shader code

Using a small number of matrix operations (especially involving inverse()) based on constants results in vkCreateGraphicsPipelines() or vkCreateComputePipelines() to either hang for a long time, produce an exception, or even cause the entire system to hang and eventually crash the driver. The length of the hang depends on the number and types of those operations in the shader, during which the memory usage increases quickly to multiple gigabytes. If the call eventually succeeds and I then create the same pipeline with the same shader code again later, even after restarting the application, the call succeeds immediately. Together with the fact that this only happens when these operations are done on constants, not dynamic variables, and especially using inverse() on mat4 types (which is obviously non-trivial), gives me the impression that some internal cached shader compilation is unable to handle the optimization of these parts.

I can reliably reproduce it with only two lines of shader code:

mat4 m = mat4(1.5);
vec4 v = vec4(0.5) * inverse(inverse(inverse(m) * m) * m);

Of course the result has to be used somehow afterwards to avoid the code simply being optimized away completely.

I understand this can (and should in a real application) be avoided by simply calculating the result manually and pasting them into the shader, but I don’t think this is expected behaviour in any case. It certainly makes testing certain things where performance is not a concern quite the nuisance.

A minimal test application that emits this behaviour can be downloaded here: 16.6 KB file on MEGA

I’m using Windows 10 x64 with a GTX 1080 Ti and studio driver version 512.15.

Hi @rasterizer , Yes I can reproduce bug on nVidia GT 1650. Also, during waiting some time I will have crash

Please update your simple project for using multiple gpu, gpu index can be set using command line arguments

#pragma comment(lib, "vulkan-1.lib")

#include <cstddef>
#include <cstdint>
#include <iostream>
#include <fstream>
#include <vector>

#define VK_USE_PLATFORM_WIN32_KHR
#include <vulkan/vulkan.h>

#define CHECK(x) if (!(x)) { std::cout << "Check failed at line " << __LINE__ << ".\r\n"; std::cin.get(); exit(1); }
#define VULKAN_CHECK(x) CHECK((x) >= 0)

VKAPI_ATTR VkBool32 VKAPI_CALL DebugReport(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType, std::uint64_t obj, std::size_t location, std::int32_t code, const char* layerPrefix, const char* msg, void* userData)
{
	std::cout << msg << "\r\n";
	return VK_FALSE;
}

VkResult CreateDebugReportCallbackEXT(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback)
{
	auto func = reinterpret_cast<PFN_vkCreateDebugReportCallbackEXT>(vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT"));

	if (!func)
		return VK_ERROR_EXTENSION_NOT_PRESENT;

	return func(instance, pCreateInfo, pAllocator, pCallback);
}

int main(int argc, char* argv[])
{
	std::cout << "Initializing...\r\n";

	const char* extensions[] = { VK_EXT_DEBUG_REPORT_EXTENSION_NAME };
	const char* layers[] = { "VK_LAYER_KHRONOS_validation" };
	VkDebugReportFlagsEXT debugReportFlags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT;

	// Instance

	VkInstanceCreateInfo instanceInfo = {};
	instanceInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
	instanceInfo.pApplicationInfo = nullptr;
	instanceInfo.enabledExtensionCount = static_cast<std::uint32_t>(sizeof(extensions) / sizeof(extensions[0]));
	instanceInfo.ppEnabledExtensionNames = extensions;
	instanceInfo.enabledLayerCount = static_cast<std::uint32_t>(sizeof(layers) / sizeof(layers[0]));
	instanceInfo.ppEnabledLayerNames = layers;

	VkInstance instance;

	VULKAN_CHECK(vkCreateInstance(&instanceInfo, nullptr, &instance));

	// Debug report callback

	VkDebugReportCallbackCreateInfoEXT debugCallbackInfo = {};
	debugCallbackInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT;
	debugCallbackInfo.flags = debugReportFlags;
	debugCallbackInfo.pfnCallback = DebugReport;

	VkDebugReportCallbackEXT debugReportCallback;

	VULKAN_CHECK(CreateDebugReportCallbackEXT(instance, &debugCallbackInfo, nullptr, &debugReportCallback));

	// Physical device

	std::uint32_t physicalDeviceCount = 0;
	VkPhysicalDevice physicalDevices[32];

	//VULKAN_CHECK(vkEnumeratePhysicalDevices(instance, &physicalDeviceCount, &physicalDevice));
	VULKAN_CHECK(vkEnumeratePhysicalDevices(instance, &physicalDeviceCount, nullptr));
	VULKAN_CHECK(vkEnumeratePhysicalDevices(instance, &physicalDeviceCount, physicalDevices));

	// Queue

	std::uint32_t computeQueueFamilyIndex = -1;

	std::uint32_t queueFamilyCount = 0;
	uint32_t index = 0;
	if (argc > 1) {
		index = atoi(argv[1]);
		if (index > physicalDeviceCount) {
			index = 0;
		}
	}
	VkPhysicalDevice physicalDevice = physicalDevices[index];
	vkGetPhysicalDeviceQueueFamilyProperties(physicalDevices[index], &queueFamilyCount, nullptr);

	std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
	vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilies.data());

	for (std::uint32_t queueFamilyIndex = 0; queueFamilyIndex < queueFamilyCount; ++queueFamilyIndex)
	{
		VkQueueFamilyProperties& queueFamily = queueFamilies[queueFamilyIndex];

		if (queueFamily.queueCount == 0)
			continue;

		if (!(queueFamily.queueFlags & VK_QUEUE_COMPUTE_BIT))
			continue;

		computeQueueFamilyIndex = queueFamilyIndex;
		break;
	}

	CHECK(computeQueueFamilyIndex != -1);

	// Device

	float computeQueuePriority = 1.0f;

	VkDeviceQueueCreateInfo queueInfo = {};
	queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
	queueInfo.queueFamilyIndex = computeQueueFamilyIndex;
	queueInfo.queueCount = 1;
	queueInfo.pQueuePriorities = &computeQueuePriority;

	VkDeviceCreateInfo deviceInfo = {};
	deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
	deviceInfo.pQueueCreateInfos = &queueInfo;
	deviceInfo.queueCreateInfoCount = 1;
	deviceInfo.pEnabledFeatures = nullptr;
	deviceInfo.enabledExtensionCount = 0;
	deviceInfo.ppEnabledExtensionNames = nullptr;

	VkDevice device;

	VULKAN_CHECK(vkCreateDevice(physicalDevice, &deviceInfo, nullptr, &device));

	// Queue handle

	VkQueue computeQueue;

	vkGetDeviceQueue(device, computeQueueFamilyIndex, 0, &computeQueue);

	// Shader module

	std::vector<std::uint32_t> shaderCode;
	std::ifstream shaderCodeStream("Shader.spv", std::ios::binary);

	CHECK(shaderCodeStream);

	for (std::uint32_t i; !shaderCodeStream.read(reinterpret_cast<char*>(&i), 4).eof(); shaderCode.push_back(i));

	VkShaderModuleCreateInfo shaderModuleInfo = {};
	shaderModuleInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
	shaderModuleInfo.codeSize = shaderCode.size() * 4;
	shaderModuleInfo.pCode = shaderCode.data();

	VkShaderModule shaderModule;

	VULKAN_CHECK(vkCreateShaderModule(device, &shaderModuleInfo, nullptr, &shaderModule));

	// Descriptor set layout

	VkDescriptorSetLayoutBinding descriptorSetLayoutBinding = {};
	descriptorSetLayoutBinding.binding = 0;
	descriptorSetLayoutBinding.descriptorCount = 1;
	descriptorSetLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
	descriptorSetLayoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;

	VkDescriptorSetLayoutCreateInfo descriptorSetLayoutInfo = {};
	descriptorSetLayoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
	descriptorSetLayoutInfo.bindingCount = 1;
	descriptorSetLayoutInfo.pBindings = &descriptorSetLayoutBinding;

	VkDescriptorSetLayout descriptorSetLayout;

	VULKAN_CHECK(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutInfo, nullptr, &descriptorSetLayout));

	// Pipeline layout

	VkPipelineLayoutCreateInfo pipelineLayoutInfo = {};
	pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
	pipelineLayoutInfo.setLayoutCount = 1;
	pipelineLayoutInfo.pSetLayouts = &descriptorSetLayout;
	pipelineLayoutInfo.pushConstantRangeCount = 0;
	pipelineLayoutInfo.pPushConstantRanges = nullptr;

	VkPipelineLayout pipelineLayout;

	VULKAN_CHECK(vkCreatePipelineLayout(device, &pipelineLayoutInfo, nullptr, &pipelineLayout));

	// Pipeline

	VkPipelineShaderStageCreateInfo shaderStage = {};
	shaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
	shaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
	shaderStage.module = shaderModule;
	shaderStage.pName = "main";

	VkComputePipelineCreateInfo pipelineInfo = {};
	pipelineInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
	pipelineInfo.stage = shaderStage;
	pipelineInfo.layout = pipelineLayout;

	VkPipeline pipeline;

	std::cout << "Creating pipeline...\r\n";

	VULKAN_CHECK(vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, nullptr, &pipeline));

	std::cout << "Done.\r\n";
	std::cin.get();

	return 0;
}

AMD RX 560 - no problem.

Hi @rasterizer,
Thanks for reporting this issue. We are able to reproduce the memory blowup/hang and there is an internal tracker to involve the appropriate components responsible for resolving the issue. Tracker is also tagged with this post in case there are any updates to be posted.

1 Like