Hello, after dozens of hours of extensive testing, I believe I have found a bug in NVIDIA GeForce drivers.
(Although it might seem to be a bug in Windows or Direct X, it is not, because their D3D_DRIVER_TYPE_WARP CPU emulation device does not show this issue.)
In a 64-bit Windows 10 Desktop program written in C++, I have created DXGI swapchain code that never blocks the calling thread.
IDXGISwapChain4->Present1 (0,0);
The above call must never block the calling thread. It works (meaning it does not block and there is no screen tearing), but only if I use NVIDIA GeForce driver version 460.89 or earlier OR if I use D3D_DRIVER_TYPE_WARP.
In other words, if I use driver 461.09 or later, or if I use the non-emulated device D3D_DRIVER_TYPE_HARDWARE, the call starts blocking for ~14 ms on each and every call.
I even tried with the DXGI_PRESENT_RESTART flag – no change.
I also tried the DXGI_PRESENT_DO_NOT_WAIT flag, which did cause the Present1(0, DXGI_PRESENT_DO_NOT_WAIT ) call to become non-blocking, but then it kept returning DXGI_ERROR_WAS_STILL_DRAWING for ~14 ms, effectively blocking the thread again.
This is not a Windows Store app, not a UWP, not a WPF app - it is just a classic 64-bit Desktop app.
Simplified paraphrased repro steps (cannot post a verbatim copy of our project files due to legal reasons, sorry). Also I’ve deleted all error/exception handling for clarity, etc.
#include <d2d1_3.h>
#include <d2d1_3helper.h>
#include <d3d11_4.h>
#include <dxgi1_6.h>
#include <dwrite_3.h>
#ifdef _DEBUG
# include <dxgidebug.h>
# include <d3d11sdklayers.h>
#endif
#include <d2derr.h>
#include <winerror.h>
#include <wincodec.h>
#include <wrl/client.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <windows.h>
#include <windowsx.h>
#include <objbase.h>
constexpr D3D_FEATURE_LEVEL DX_FEATR_LVLS [DX_NBR_D3D_FEATURE_LEVELS] = {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_3,
D3D_FEATURE_LEVEL_9_2,
D3D_FEATURE_LEVEL_9_1,
};
Microsoft::WRL::ComPtr<IDXGIFactory7> m_dxgiFactory7;
Microsoft::WRL::ComPtr<IDXGIFactory7> m_dxgiParentFactory7;
Microsoft::WRL::ComPtr<IDXGIAdapter> m_dxgiParentAdapter;
Microsoft::WRL::ComPtr<IDXGISwapChain1> m_dxgiSwapchain;
Microsoft::WRL::ComPtr<IDXGISwapChain4> m_dxgiSwapchain4;
Microsoft::WRL::ComPtr<ID3D11Device5> m_d3dDevice5;
Microsoft::WRL::ComPtr<ID3D11Device> m_d3dDevice0;
Microsoft::WRL::ComPtr<IDXGIDevice4> m_dxgiDevice4;
Microsoft::WRL::ComPtr<ID2D1Device6> m_d2dDevice6;
Microsoft::WRL::ComPtr<IDXGISurface2> m_dxgiBackbufferSurface;
Microsoft::WRL::ComPtr<ID2D1Bitmap1> m_d2dBackbufferBitmap;
Microsoft::WRL::ComPtr<IDWriteFactory7> m_dWriteFactory;
Microsoft::WRL::ComPtr<IWICImagingFactory2> m_wicFactory;
Microsoft::WRL::ComPtr<ID3D11DeviceContext4> m_d3dDeviceCtx4;
Microsoft::WRL::ComPtr<ID3D11DeviceContext> m_d3dDeviceCtx0;
Microsoft::WRL::ComPtr<ID2D1DeviceContext6> m_d2dDeviceCtx6;
CreateDXGIFactory2 (0, _uuidof (IDXGIFactory7),
(void **) m_dxgiFactory7.GetAddressOf ());
CoCreateInstance (
CLSID_WICImagingFactory2,
nullptr,
CLSCTX_INPROC_SERVER,
IID_PPV_ARGS (&m_wicFactory));
D3D11CreateDevice (nullptr,
warpFlag ? D3D_DRIVER_TYPE_WARP : D3D_DRIVER_TYPE_HARDWARE, // The issue has been verified to occur only with D3D_DRIVER_TYPE_HARDWARE
0,
D3D11_CREATE_DEVICE_BGRA_SUPPORT,
DX_FEATR_LVLS,
DX_NBR_D3D_FEATURE_LEVELS,
D3D11_SDK_VERSION,
m_d3dDevice0.GetAddressOf (),
nullptr,
m_d3dDeviceCtx0.GetAddressOf ());
m_d3dDevice0->QueryInterface (__uuidof (ID3D11Device5), (void **) m_d3dDevice5.GetAddressOf ());
m_d3dDeviceCtx0.As (&m_d3dDeviceCtx4);
m_d3dDevice5->QueryInterface (__uuidof(IDXGIDevice4), (void **) m_dxgiDevice4.GetAddressOf ());
m_dxgiDevice4->GetParent (__uuidof(IDXGIAdapter), (void **) &m_dxgiParentAdapter);
m_dxgiParentAdapter->GetParent (__uuidof(IDXGIFactory7), (void **) &m_dxgiParentFactory7);
D2D1CreateDevice (m_dxgiDevice4.Get (),
D2D1::CreationProperties (
D2D1_THREADING_MODE_SINGLE_THREADED,
D2D1_DEVICE_CONTEXT_OPTIONS_NONE),
(ID2D1Device **) m_d2dDevice6.GetAddressOf ());
m_d2dDevice6->CreateDeviceContext (D2D1_DEVICE_CONTEXT_OPTIONS_NONE, m_d2dDeviceCtx6.GetAddressOf ());
m_d2dDeviceCtx6->SetUnitMode (D2D1_UNIT_MODE_PIXELS);
m_d2dDeviceCtx6->SetTransform (D2D1::Matrix3x2F::Identity ());
DXGI_SWAP_CHAIN_DESC1 dxgiSwapchainDsc;
ZERO_MEM (dxgiSwapchainDsc);
m_dxgiFactory7->CreateSwapChainForHwnd (m_d3dDevice5.Get (), m_mainWindowHandle, &dxgiSwapchainDsc, nullptr, nullptr, m_dxgiSwapchain.GetAddressOf ());
m_dxgiSwapchain.As (&m_dxgiSwapchain4);
m_dxgiParentFactory7->MakeWindowAssociation (m_mainWindowHandle, DXGI_MWA_NO_WINDOW_CHANGES);
m_dxgiSwapchain4->GetBuffer (0, __uuidof(IDXGISurface2), (void **) m_dxgiBackbufferSurface.GetAddressOf ());
m_d2dDeviceCtx6->CreateBitmapFromDxgiSurface (
m_dxgiBackbufferSurface.Get (),
D2D1::BitmapProperties1 (D2D1_BITMAP_OPTIONS_TARGET | D2D1_BITMAP_OPTIONS_CANNOT_DRAW, D2D1::PixelFormat (DXGI_FORMAT_B8G8R8A8_UNORM, DX_RENDER_TARGET_ALPHA_MODE), 0, 0),
m_d2dBackbufferBitmap.GetAddressOf ());
DXGI_PRESENT_PARAMETERS dxgiPresentParams;
ZERO_MEM (dxgiPresentParams);
dxgiSwapchainDsc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
dxgiSwapchainDsc.BufferCount = 2;
dxgiSwapchainDsc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_BACK_BUFFER;
dxgiSwapchainDsc.Flags = (0
#if 0 // Tried this too (with GetFrameLatencyWaitableObject)-- didn't solve the issue
| DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT
#endif
);
dxgiSwapchainDsc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
dxgiSwapchainDsc.SampleDesc.Count = 1;
dxgiSwapchainDsc.SampleDesc.Quality = 0;
dxgiSwapchainDsc.Scaling = DXGI_SCALING_NONE;
dxgiSwapchainDsc.Stereo = false;
dxgiSwapchainDsc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
dxgiSwapchainDsc.Width = canvasWidth;
dxgiSwapchainDsc.Height = canvasHeight;
// Using WIC to import PNGs. Also calling m_d2dDeviceCtx6->CreateBitmap to import other picture file formats not supported by WIC.
// UI redraw loop start
// ...
m_d2dDeviceCtx6->SetTarget (m_d2dBackbufferBitmap.Get ());
// Calling D2D to draw bitmaps and geometric primitives, and also DirectWrite APIs (no 3D stuff).
// Using GetTarget and SetTarget for off-screen scene bitmap rendering.
// ...
// Now this call should never block, but it _does_ block as described at the beginning of my post.
m_dxgiSwapchain4->Present1 (0, 0), &dxgiPresentParams);
// I even tried with the DXGI_PRESENT_RESTART flag – no change. I also tried the DXGI_PRESENT_DO_NOT_WAIT flag, which _did_ cause the Present1(0, DXGI_PRESENT_DO_NOT_WAIT) call to
// become non-blocking, but then it kept returning DXGI_ERROR_WAS_STILL_DRAWING for ~14 ms (in a local sub-loop with a timeout of 1000 ms), effectively blocking the thread again.
// UI redraw loop end
// ...
// ---------- Relevant portions of WinMain showing how the swapchain-tied window is created
int CALLBACK WinMain (HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR lpCmdL, int nCmdS)
{
CoInitialize(NULL);
SetPriorityClass (GetCurrentProcess(), HIGH_PRIORITY_CLASS);
// ...
WNDCLASSEX wc;
m_WinMenu = CreateMenu ();
wc.cbSize = sizeof (wc);
wc.style = CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = m_winProc;
wc.cbClsExtra = 0;
wc.cbWndExtra = 0;
wc.hInst = hInst;
wc.hIcon = LoadIcon (nullptr, IDI_APPLICATION);
wc.hCursor = nullptr;
wc.hbrBackground = nullptr;
wc.lpszMenuName = m_WinClassName;
wc.lpszClassName = m_WinClassName;
wc.hIconSm = LoadIcon (nullptr, IDI_APPLICATION);
RegisterClassEx (&wc);
m_mainWindowHandle = CreateWindowEx (WS_EX_APPWINDOW,
m_WinClassName,
m_WinTitle,
(WS_OVERLAPPEDWINDOW & ~WS_MAXIMIZEBOX),
0, 0,
10, 10,
nullptr,
nullptr,
hInst,
nullptr);
// ...
}
Observed while running:
- nVidia RTX 2070 with the below list drivers.
- Two-monitor setup, both with the same resolution (1920x1200).
- Windows 10 Pro x64, Version 21H2, OS build 19044.2364, Experience Windows Feature Experience Pack 120.2212.4190.0
GeForce driver settings:
In NVIDIA Control Panel, our program does not have any per-program settings, and I have double checked that that this relevant global setting is set to “Use the 3D application setting”: ‘3D Settings’ > ‘Manage 3D settings’ > ‘Vertical sync’.
If I set that setting to ‘Off’ with the latest driver, I get the correct non-blocking behavior, but this introduces unwanted screen tearing. If I set it to ‘On’ with the latest driver, I get blocking behavior but with no screen tearing, which is unwanted as well. And I will repeat that we can achieve the desired state (non-blocking behavior without any screen tearing either by using GeForce driver 460.89 or earlier OR by using D3D_DRIVER_TYPE_WARP).
Tested NVIDIA GeForce drivers:
These work correctly (non-blocking behavior):
457.51-desktop-win10-64bit-international-whql
460.89-desktop-win10-64bit-international-nsd-dch-whql
These do NOT work correctly (blocking behavior):
461.09-desktop-win10-64bit-international-dch-whql
461.40-desktop-win10-64bit-international-nsd-dch-whql.exe
461.92-desktop-win10-64bit-international-nsd-whql
472.12-desktop-win10-win11-64bit-international-whql
472.84-desktop-win10-win11-64bit-international-nsd-whql
522.30-desktop-win10-win11-64bit-international-nsd-dch-whql
526.47-desktop-win10-win11-64bit-international-dch-whql
526.98-desktop-win10-win11-64bit-international-dch-whql
526.98-desktop-win10-win11-64bit-international-nsd-dch-whql
531.61-desktop-win10-win11-64bit-international-nsd-dch-whql
Program features:
- Built using VS Community 2022 with latest updates applied.
- All drawing, rendering, DX-presenting, and custom-UI management are all done from a single thread in a fully synchronous manner. No race conditions, no callbacks can ever initiate any drawing, everything is thread-safe. However, just to confirm this, I tested with mutex locked at the beginning of every graphics-related call too (it changed nothing – the problem was still there).
Some potentially relevant preprocessor definitions in VS 2022 project settings:
X64_BUILD
_WINDOWS
NTDDI_VERSION=NTDDI_WIN10_RS4
_WIN32_WINNT=_WIN32_WINNT_WIN10
WINVER=_WIN32_WINNT_WIN10
DIRECTINPUT_VERSION=0x0800
_CRT_SECURE_NO_WARNINGS
_ALLOW_RTCc_IN_STL
Potentially relevant linker inputs:
d3d11.lib
d2d1.lib
dxgi.lib
dwrite.lib
dsound.lib
dinput8.lib
dxguid.lib
odbc32.lib
odbccp32.lib
winmm.lib
kernel32.lib
user32.lib
avrt.lib
PowrProf.lib
I used the DX debug layer to see if any issues are reported (there were indeed a couple at the beginning of the development, but I’ve fixed all since). So very briefly, I used:
DXGI_CREATE_FACTORY_DEBUG
D3D11_CREATE_DEVICE_DEBUG
D2D1_DEBUG_LEVEL_INFORMATION
Thank you for looking into this.