560 release feedback & discussion

hi @abchauhan . Any update on this? Was nvidia team able to repro?

560 (with Gnome + Wayland) has broken screen streaming / recording for me in OBS. The “Screen Capture (PipeWire)” source shows up 100% black, except for the mouse cursor, after giving it permission (via the Gnome modal window that pops up) to share my entire desktop - and alternately, with specific windows marked ok to share. At no point do any pixels from my desktop appear in OBS.
This definitely worked correctly with 550. I don’t think I tried it with 555, so I can’t say how recently this regression began.

I’m no longer seeing this issue with the 560.35.03 drivers.

After updating to 560.35.03 I started seeing wezterm crashes constantly when running the WebGpu (Rust’s wgpu) backend with this backtrace:

Thread 1 "wezterm-gui" received signal SIGSEGV, Segmentation fault.
0x00007fffedc10857 in ?? () from /usr/lib/libnvidia-glcore.so.560.35.03
(gdb) bt
#0  0x00007fffedc10857 in ?? () from /usr/lib/libnvidia-glcore.so.560.35.03
#1  0x00007fffee0279e9 in ?? () from /usr/lib/libnvidia-glcore.so.560.35.03
#2  0x00007fffee00d380 in ?? () from /usr/lib/libnvidia-glcore.so.560.35.03
#3  0x00007fffedf4b78e in ?? () from /usr/lib/libnvidia-glcore.so.560.35.03
#4  0x00007fffd6de3714 in DispatchWaitForFences (device=0x55555f6093a0, fenceCount=1, pFences=0x7fffffff5ab0, waitAll=1, timeout=18446744073709551615)
    at /usr/src/debug/vulkan-validation-layers/Vulkan-ValidationLayers-vulkan-sdk-1.3.275/layers/vulkan/generated/layer_chassis_dispatch.cpp:799
#5  0x00007fffd6cb8c63 in vulkan_layer_chassis::WaitForFences (device=0x55555f6093a0, fenceCount=1, pFences=0x7fffffff5ab0, waitAll=1, timeout=18446744073709551615)
    at /usr/src/debug/vulkan-validation-layers/Vulkan-ValidationLayers-vulkan-sdk-1.3.275/layers/vulkan/generated/chassis.cpp:1824
#6  0x0000555556eaa784 in ash::device::Device::wait_for_fences (self=0x55555f5e9148, fences=..., wait_all=true, timeout=18446744073709551615)
    at /home/lukasz/.cargo/registry/src/index.crates.io-6f17d22bba15001f/ash-0.37.3+1.3.251/src/device.rs:2399
#7  0x0000555556f378e4 in wgpu_hal::vulkan::instance::{impl#5}::acquire_texture (self=0x55555f3f03e8, timeout=...) at src/vulkan/instance.rs:911
#8  0x0000555556cd5745 in wgpu_core::global::Global<wgpu_core::identity::IdentityManagerFactory>::surface_get_current_texture<wgpu_core::identity::IdentityManagerFactory, wgpu_hal::vulkan::Api> (self=0x55555f1a41a0, surface_id=..., texture_id_in=()) at /home/lukasz/.cargo/registry/src/index.crates.io-6f17d22bba15001f/wgpu-core-0.18.1/src/present.rs:162
#9  0x0000555556a115ac in wgpu::backend::direct::{impl#7}::surface_get_current_texture (self=0x55555f1a41a0, surface=0x7fffffff7468, surface_data=0x55555f1bae50)
    at src/backend/direct.rs:799
#10 0x0000555556a2ad86 in wgpu::context::{impl#5}::surface_get_current_texture<wgpu::backend::direct::Context> (self=0x55555f1a41a0, surface=0x55555ccf01e8, surface_data=...)
    at src/context.rs:2212
#11 0x0000555556a38ffd in wgpu::Surface::get_current_texture (self=0x55555ccf01c8) at src/lib.rs:4948
#12 0x00005555563b4744 in wezterm_gui::termwindow::TermWindow::call_draw_webgpu (self=0x55555d0ee188) at wezterm-gui/src/termwindow/render/draw.rs:26
#13 0x00005555563b45e8 in wezterm_gui::termwindow::TermWindow::call_draw (self=0x55555d0ee188, frame=0x7fffffff96a8) at wezterm-gui/src/termwindow/render/draw.rs:16
#14 0x00005555563bb900 in wezterm_gui::termwindow::TermWindow::paint_impl (self=0x55555d0ee188, frame=0x7fffffff96a8) at wezterm-gui/src/termwindow/render/paint.rs:108
#15 0x00005555563de0e1 in wezterm_gui::termwindow::TermWindow::do_paint_webgpu_impl (self=0x55555d0ee188) at wezterm-gui/src/termwindow/mod.rs:1100
#16 0x00005555563ddee8 in wezterm_gui::termwindow::TermWindow::do_paint_webgpu (self=0x55555d0ee188) at wezterm-gui/src/termwindow/mod.rs:1084
#17 0x00005555563dc36d in wezterm_gui::termwindow::TermWindow::dispatch_window_event (self=0x55555d0ee188, event=..., window=0x55555d0f0548) at wezterm-gui/src/termwindow/mod.rs:1002
#18 0x0000555556097665 in wezterm_gui::termwindow::{impl#4}::new_window::{async_fn#0}::{closure#7} (event=..., window=0x55555d0f0548) at wezterm-gui/src/termwindow/mod.rs:823
#19 0x000055555828db92 in alloc::boxed::{impl#49}::call_mut<(window::WindowEvent, &window::os::x_and_wayland::Window), dyn core::ops::function::FnMut<(window::WindowEvent, &window::os::x_and_wayland::Window), Output=()>, alloc::alloc::Global> (self=0x55555d0f0558, args=...) at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/boxed.rs:2070
#20 0x0000555558178c93 in window::WindowEventSender::dispatch (self=0x55555d0f0548, event=...) at window/src/lib.rs:241
#21 0x00005555581add6f in window::os::wayland::window::WaylandWindowInner::do_paint (self=0x55555d0f04f8) at window/src/os/wayland/window.rs:1056
#22 0x00005555581ade6b in window::os::wayland::window::WaylandWindowInner::next_frame_is_ready (self=0x55555d0f04f8) at window/src/os/wayland/window.rs:1071
#23 0x0000555558158b3e in window::os::wayland::window::{impl#4}::frame::{closure#0} (inner=0x55555d0f04f8) at window/src/os/wayland/window.rs:1303
#24 0x00005555581db6c0 in window::os::wayland::connection::{impl#0}::with_window_inner::{async_block#0}<(), window::os::wayland::window::{impl#4}::frame::{closure_env#0}> ()
    at window/src/os/wayland/connection.rs:151
#25 0x00005555583e4060 in async_task::raw::RawTask<window::os::wayland::connection::{impl#0}::with_window_inner::{async_block_env#0}<(), window::os::wayland::window::{impl#4}::frame::{closure_env#0}>, (), promise::spawn::spawn_into_main_thread::{closure_env#0}<window::os::wayland::connection::{impl#0}::with_window_inner::{async_block_env#0}<(), window::os::wayland::window::{impl#4}::frame::{closure_env#0}>, ()>, ()>::run<window::os::wayland::connection::{impl#0}::with_window_inner::{async_block_env#0}<(), window::os::wayland::window::{impl#4}::frame::{closure_env#0}>, (), promise::spawn::spawn_into_main_thread::{closure_env#0}<window::os::wayland::connection::{impl#0}::with_window_inner::{async_block_env#0}<(), window::os::wayland::window::{impl#4}::frame::{closure_env#0}>, ()>, ()> (ptr=0x55555fe659f0) at /home/lukasz/.cargo/registry/src/index.crates.io-6f17d22bba15001f/async-task-4.7.1/src/raw.rs:557
#26 0x0000555559f234cf in async_task::runnable::Runnable<()>::run<()> (self=...) at /home/lukasz/.cargo/registry/src/index.crates.io-6f17d22bba15001f/async-task-4.7.1/src/runnable.rs:781
#27 0x00005555581905ab in window::spawn::schedule_with_pri::{closure#0} () at window/src/spawn.rs:40
#28 0x00005555582c2db1 in core::ops::function::FnOnce::call_once<window::spawn::schedule_with_pri::{closure_env#0}, ()> ()
    at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/core/src/ops/function.rs:250
#29 0x00005555593a52db in alloc::boxed::{impl#48}::call_once<(), (dyn core::ops::function::FnOnce<(), Output=()> + core::marker::Send), alloc::alloc::Global> (self=..., args=())
    at /rustc/3f5fd8dd41153bc5fdca9427e9e05be2c767ba23/library/alloc/src/boxed.rs:2063
#30 0x0000555558181365 in window::spawn::SpawnQueue::run_impl (self=0x55555cdaa0b0) at window/src/spawn.rs:169
#31 0x000055555817fb2a in window::spawn::SpawnQueue::run (self=0x55555cdaa0b0) at window/src/spawn.rs:63
#32 0x00005555581c1698 in window::os::wayland::connection::WaylandConnection::run_message_loop_impl (self=0x55555cc8dee0) at window/src/os/wayland/connection.rs:78
#33 0x00005555581c25e3 in window::os::wayland::connection::{impl#1}::run_message_loop (self=0x55555cc8dee0) at window/src/os/wayland/connection.rs:171
#34 0x00005555582ecf18 in window::os::x_and_wayland::{impl#1}::run_message_loop (self=0x55555cc00330) at window/src/os/x_and_wayland.rs:150
#35 0x0000555555ff3979 in wezterm_gui::frontend::GuiFrontEnd::run_forever (self=0x55555cdc1720) at wezterm-gui/src/frontend.rs:324
#36 0x00005555560fa263 in wezterm_gui::run_terminal_gui (opts=..., default_domain_name=...) at wezterm-gui/src/main.rs:790
#37 0x0000555556103734 in wezterm_gui::run () at wezterm-gui/src/main.rs:1255
#38 0x00005555560fab91 in wezterm_gui::main () at wezterm-gui/src/main.rs:836

I am also experiencing issues with wgpu, here the vulkan backend though: internal error: entered unreachable code: Fallback system failed to choose present mode. This is a bug. Mode: AutoVsync, Options: []

Protocol error 0 on wp_linux_drm_syncobj_manager_v1 is also spammed in the logs, maybe that helps?

Was working on 555.58.02, which I have reverted to for the time being.

2 Likes

Getting a hard freeze with no screen / keyboard / mouse response for around 15 seconds. Spamming CTRL + ALT + F1 to get to terminal, eventually, after some monitor flickering, terminal loaded where I was then able to restart the lightdm service, which allowed me to recover the OS. Not a hard hard freeze but near enough.

2024-08-29T23:59:21.469912+01:00 steel-legend kernel: NVRM: GPU at PCI:0000:01:00: GPU-788cf272-ea83-2533-a6e5-0f50faf6d992
2024-08-29T23:59:21.469925+01:00 steel-legend kernel: NVRM: Xid (PCI:0000:01:00): 109, pid=6860, name=WoW.exe, Ch 0000002d, errorString CTX SWITCH TIMEOUT, Info 0x6c02a

RTX 3080
9950X
Linux Mint 22
Kernel: 6.10.2-061002-generic
Driver: 560.35.03
Playing World of Warcraft

I also have been getting this. I can’t discern any pattern to reproduce, and sometimes I can go days without seeing it.

Aug 29 10:55:07 kwin_wayland[2254]: kf.windowsystem: static bool KX11Extras::mapViewport() may only be used on X11
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: 0x501: GL_INVALID_VALUE error generated. <levels>, <width> and <height> must be 1 or greater.
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: Invalid framebuffer status:  "GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT"
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: 0x502: GL_INVALID_OPERATION error generated. Framebuffer name must be generated before being bound.
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: 0x506: GL_INVALID_FRAMEBUFFER_OPERATION error generated. Operation is not valid because a bound framebuffer is not framebuffer complete.
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: 0x501: GL_INVALID_VALUE error generated. <levels>, <width> and <height> must be 1 or greater.
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: Invalid framebuffer status:  "GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT"
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: 0x502: GL_INVALID_OPERATION error generated. Framebuffer name must be generated before being bound.
Aug 29 10:55:07 kwin_wayland[2254]: kwin_scene_opengl: 0x506: GL_INVALID_FRAMEBUFFER_OPERATION error generated. Operation is not valid because a bound framebuffer is not framebuffer complete.

----
Aug 29 11:12:55 kwin_wayland[2254]: kwin_wayland_drm: Pageflip timed out! This is a kernel bug
Aug 29 11:13:00 kwin_wayland[2254]: kwin_wayland_drm: Pageflip timed out! This is a kernel bug
Aug 29 11:13:05 kwin_wayland[2254]: kwin_wayland_drm: Pageflip timed out! This is a kernel bug
Aug 29 11:13:10 kwin_wayland[2254]: kwin_wayland_drm: Pageflip timed out! This is a kernel bug
Aug 29 11:13:15 kwin_wayland[2254]: kwin_wayland_drm: Pageflip timed out! This is a kernel bug
Aug 29 11:13:20 kwin_wayland[2254]: kwin_wayland_drm: Pageflip timed out! This is a kernel bug

I always see the Pageflip timed out message a few times when this happens. I figured the logs prior to this may be relevant. I’ve been seeing this on 555.X and 560.X. When it happens, everything locks up for a few seconds and I have to close and relaunch World of Warcraft. Kernel version is 6.10.5-1-default

Experiencing the same issue with Unreal Engine 5.4.1 (built from source). No issues when switching to X11

Drivers version: 560.35.03

The stack trace matches. Full stack trace below

Caught signal 11 Segmentation fault

libnvidia-glcore.so.560.35.03!UnknownFunction(0xa10857)
libnvidia-glcore.so.560.35.03!UnknownFunction(0xe279e8)
libnvidia-glcore.so.560.35.03!UnknownFunction(0xe0d37f)
libnvidia-glcore.so.560.35.03!UnknownFunction(0xd4b78d)
libUnrealEditor-VulkanRHI.so!VulkanRHI::FFenceManager::WaitForFence(VulkanRHI::FFence*, unsigned long long) [/data/Sources/UnrealEngine/Engine/Source/./Runtime/VulkanRHI/Private/VulkanMemory.cpp:4690]
libUnrealEditor-VulkanRHI.so!FVulkanCommandBufferManager::WaitForCmdBuffer(FVulkanCmdBuffer*, float) [/data/Sources/UnrealEngine/Engine/Source/./Runtime/VulkanRHI/Private/VulkanCommandBuffer.cpp:502]
libUnrealEditor-VulkanRHI.so!FVulkanViewport::WaitForFrameEventCompletion() [/data/Sources/UnrealEngine/Engine/Source/./Runtime/VulkanRHI/Private/VulkanViewport.cpp:278]
libUnrealEditor-VulkanRHI.so!FVulkanViewport::Present(FVulkanCommandListContext*, FVulkanCmdBuffer*, FVulkanQueue*, FVulkanQueue*, bool) [/data/Sources/UnrealEngine/Engine/Source/./Runtime/VulkanRHI/Private/VulkanViewport.cpp:1048]
libUnrealEditor-VulkanRHI.so!FVulkanCommandListContext::RHIEndDrawingViewport(FRHIViewport*, bool, bool) [/data/Sources/UnrealEngine/Engine/Source/./Runtime/VulkanRHI/Private/VulkanRHI.cpp:1184]
libUnrealEditor-RHI.so!FRHICommand<FRHICommandEndDrawingViewport, FRHICommandEndDrawingViewportString2188>::ExecuteAndDestruct(FRHICommandListBase&, FRHICommandListDebugContext&) [/data/Sources/UnrealEngine/Engine/Source/Runtime/RHI/Public/RHICommandList.h:1295]
libUnrealEditor-RHI.so!FRHICommandListBase::Execute(TRHIPipelineArray<IRHIComputeContext*>&, FRHICommandListBase::FPersistentState::FGPUStats*) [/data/Sources/UnrealEngine/Engine/Source/./Runtime/RHI/Private/RHICommandList.cpp:477]
libUnrealEditor-RHI.so!UE::Core::Private::Function::TFunctionRefCaller<FRHICommandListImmediate::ExecuteAndReset(bool)::$_0, void ()>::Call(void*) [/data/Sources/UnrealEngine/Engine/Source/Runtime/Core/Public/Templates/Function.h:405]
libUnrealEditor-Core.so!TGraphTask<TFunctionGraphTaskImpl<void (), (ESubsequentsMode::Type)0> >::ExecuteTask(TArray<FBaseGraphTask*, TSizedDefaultAllocator<32> >&, ENamedThreads::Type, bool) [/data/Sources/UnrealEngine/Engine/Source/Runtime/Core/Public/Async/TaskGraphInterfaces.h:1235]
libUnrealEditor-Core.so!FNamedTaskThread::ProcessTasksNamedThread(int, bool) [/data/Sources/UnrealEngine/Engine/Source/./Runtime/Core/Private/Async/TaskGraph.cpp:760]
libUnrealEditor-Core.so!FNamedTaskThread::ProcessTasksUntilQuit(int) [/data/Sources/UnrealEngine/Engine/Source/./Runtime/Core/Private/Async/TaskGraph.cpp:650]
libUnrealEditor-RenderCore.so!FRHIThread::Run() [/data/Sources/UnrealEngine/Engine/Source/./Runtime/RenderCore/Private/RenderingThread.cpp:330]
libUnrealEditor-Core.so!FRunnableThreadPThread::Run() [/data/Sources/UnrealEngine/Engine/Source/./Runtime/Core/Private/HAL/PThreadRunnableThread.cpp:25]
libUnrealEditor-Core.so!FRunnableThreadPThread::_ThreadProc(void*) [/data/Sources/UnrealEngine/Engine/Source/Runtime/Core/Private/HAL/PThreadRunnableThread.h:187]
libc.so.6!UnknownFunction(0x9439c)
libc.so.6!UnknownFunction(0x11949b)
1 Like

runnig arch linux kde plasma this drivers were released as the stable ones i found myself having to downgrade to 555 or 550 because is causing really bad fps drops by ove 40 fps on some games 100 fps this was rolled as stable ones according to the update but doesnt seem that way games lag a lot too i ran it on zen and performance is super bad but when i switch to lts seems to run a little better

Also facing the issue with vkcube-wayland failing to run on iGPU on EndeavourOS with Optimus setup of Radeon 680M + GeForce RTX 4060 Laptop (Plasma Wayland, open nvidia driver 560.35.03):

> vkcube-wayland --gpu_number 1
Selected GPU 1: AMD Radeon 680M (RADV REMBRANDT), type: IntegratedGpu
wp_linux_drm_syncobj_manager_v1#24: error 0: surface already exists

No window appears and the process has to be Ctrl+C’d.

glxgears, eglgears_x11, eglgears_wayland, vkcube, vkcubepp do not have any issues with running on iGPU. Problem also disappears if I use supergfxctl to switch to iGPU-only mode.

Can also confirm the issue with running native wayland apps through Vulkan on Intel iGPU + NVIDIA RTX 3060 Mobile, running on Plasma 6.1.4 Wayland, 560.35.03 open variant

vkcube-wayland --gpu_number 0                                                                                                                   
Selected GPU 0: Intel(R) Graphics (ADL GT2), type: IntegratedGpu
wp_linux_drm_syncobj_manager_v1#24: error 0: surface already exists

Can also replicate this with zed editor
Running zeditor --foreground spams Protocol error 0 on object wp_linux_drm_syncobj_manager_v1@45:, but running prime-run zeditor --foreground works

Unreal Engine 5.4.3-34507850+++UE5 using driver version : 560.35.03 running wayland :

libnvidia-glcore.so.560.35.03!UnknownFunction(0xa10857)
libnvidia-glcore.so.560.35.03!UnknownFunction(0xe279e8)
libnvidia-glcore.so.560.35.03!UnknownFunction(0xe0d37f)
libnvidia-glcore.so.560.35.03!UnknownFunction(0xd4b78d)
libUnrealEditor-VulkanRHI.so!VulkanRHI::FFenceManager::WaitForFence(VulkanRHI::FFence*, unsigned long long) [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/VulkanRHI/Private/VulkanMemory.cpp:4690]
libUnrealEditor-VulkanRHI.so!FVulkanSwapChain::AcquireImageIndex(VulkanRHI::FSemaphore**) [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/VulkanRHI/Private/VulkanSwapChain.cpp:732]
libUnrealEditor-VulkanRHI.so!FVulkanViewport::Present(FVulkanCommandListContext*, FVulkanCmdBuffer*, FVulkanQueue*, FVulkanQueue*, bool) [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/VulkanRHI/Private/VulkanViewport.cpp:916]
libUnrealEditor-VulkanRHI.so!FVulkanCommandListContext::RHIEndDrawingViewport(FRHIViewport*, bool, bool) [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/VulkanRHI/Private/VulkanRHI.cpp:1184]
libUnrealEditor-RHI.so!FRHICommand<FRHICommandEndDrawingViewport, FRHICommandEndDrawingViewportString2188>::ExecuteAndDestruct(FRHICommandListBase&, FRHICommandListDebugContext&) [/mnt/horde/++UE5/Sync/Engine/Source/Runtime/RHI/Public/RHICommandList.h:1295]
libUnrealEditor-RHI.so!FRHICommandListBase::Execute(TRHIPipelineArray<IRHIComputeContext*>&, FRHICommandListBase::FPersistentState::FGPUStats*) [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/RHI/Private/RHICommandList.cpp:477]
libUnrealEditor-RHI.so!UE::Core::Private::Function::TFunctionRefCaller<FRHICommandListImmediate::ExecuteAndReset(bool)::$_0, void ()>::Call(void*) [/mnt/horde/++UE5/Sync/Engine/Source/Runtime/Core/Public/Templates/Function.h:405]
libUnrealEditor-Core.so!TGraphTask<TFunctionGraphTaskImpl<void (), (ESubsequentsMode::Type)0> >::ExecuteTask(TArray<FBaseGraphTask*, TSizedDefaultAllocator<32> >&, ENamedThreads::Type, bool) [/mnt/horde/++UE5/Sync/Engine/Source/Runtime/Core/Public/Async/TaskGraphInterfaces.h:1235]
libUnrealEditor-Core.so!FNamedTaskThread::ProcessTasksNamedThread(int, bool) [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/Core/Private/Async/TaskGraph.cpp:760]
libUnrealEditor-Core.so!FNamedTaskThread::ProcessTasksUntilQuit(int) [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/Core/Private/Async/TaskGraph.cpp:650]
libUnrealEditor-RenderCore.so!FRHIThread::Run() [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/RenderCore/Private/RenderingThread.cpp:330]
libUnrealEditor-Core.so!FRunnableThreadPThread::Run() [/mnt/horde/++UE5/Sync/Engine/Source/./Runtime/Core/Private/HAL/PThreadRunnableThread.cpp:25]
libUnrealEditor-Core.so!FRunnableThreadPThread::_ThreadProc(void*) [/mnt/horde/++UE5/Sync/Engine/Source/Runtime/Core/Private/HAL/PThreadRunnableThread.h:187]
libc.so.6!UnknownFunction(0x976d6)
libc.so.6!UnknownFunction(0x11b60b)
1 Like

With 560 none of my monitors connected to the nvidia card are detected in Wayland, it just shows a single unkown display with 1024x768.
Monitors connected to my internal radeon GPU work fine.
With 545 all 5 monitors were detected properly but Apps were crashing every time i resized the window.

Forwarding from Arch bugtracker.

Currently using a hybrid laptop with Intel (i5-10750H) as main GPU and 3060 (6GB) as offload. After the latest upgrade, I’ve been getting random hangs when I haven’t been using the Nvidia GPU for a while. Downgrading to 555 fixes this problem.

I’m getting the following message spammed on dmesg:

8月 28 04:19:02 substrate kernel: nvidia-modeset: ERROR: GPU:0: Error while waiting for GPU progress: 0x0000c67d:0 2:2:0:4040

This usually just causes a hitch in my GUI and an inability to use the Nvidia GPU for the session. Eventually, I’ll get messages like these:

 8月 28 04:20:30 substrate kwin_wayland[1454]: kwin_wayland_drm: Pageflip timed out! This is a kernel bug

After which the GUI will hang for longer periods of time (about 30 seconds each time). The rest of the system is still running (pacman updates on terminal, music playing from Firefox or Cider, etc.)

The alternative for me to avoid the crash is to constantly call nvidia-smi somehow (in my case, using the task manager).

Hi, I cannot find the stack trace in pstore logs nor journal after reboot. I have now also tried 560.35.03 and now I get a black screen with blinking cursor after resume from suspend instead of a kernel panic screen.
Downgraded to 550 once again.

2 Likes

Hi @jrelvas , @thesourcehim , @v19930312 , @kuba10j

Yes, this can be reproduced on our systems.
Filed - NVBug #4840658 vkcube-wayland, wayland apps fail to launch on iGPU for Optimus notebooks running Plasma Wayland sessions

I cannot reproduce this on older drivers (< 560.xx). Can you please confirm this does not reproduce on our 550, 555 driver series.

Thank you

4 Likes

Can reproduce this as well.
Broken on 560 but still working on 555.58.02.

Ubuntu 24, on x11 - all proton/wine games work fine with 555, instead they don’t even start/run with 560 (installed from the Ubuntu graphics drivers PPA).

3 Likes

Yes, can confirm that it works on 550.107.2, but is broken on 560.35.03

I also suffer from kernel crash after resuming the system. The kernel crashes randomly starting from version 555.
The graphic driver crashes every time on resuming at all my Nvidia GPU powered machines.
Rolling back to version 550 is viable workaround for the time being.

nvidia-bug-report.log.gz (366.5 KB)

dmesg output

[ 132.230256] ------------[ cut here ]------------
[ 132.230264] WARNING: CPU: 0 PID: 4129 at include/linux/rwsem.h:80 follow_pte+0x1f0/0x220
[ 132.230284] Modules linked in: binfmt_misc snd_seq_dummy snd_hrtimer nvidia_drm(POE) nvidia_modeset(POE) nvidia_uvm(POE) vboxnetadp(OE) vboxnetflt(OE) nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 vboxdrv(OE) nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nvidia(POE) nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables qrtr bnep sunrpc snd_sof_pci_intel_cnl snd_sof_intel_hda_generic soundwire_intel soundwire_cadence snd_sof_intel_hda_common snd_sof_intel_hda_mlink snd_sof_intel_hda snd_sof_pci snd_sof_xtensa_dsp dell_rbtn snd_sof snd_sof_utils soundwire_generic_allocation soundwire_bus snd_hda_codec_hdmi snd_soc_avs iwlmvm snd_soc_hda_codec snd_soc_skl vfat snd_soc_hdac_hda fat snd_ctl_led snd_hda_ext_core snd_soc_sst_ipc intel_uncore_frequency snd_hda_codec_realtek snd_soc_sst_dsp intel_uncore_frequency_common snd_soc_acpi_intel_match intel_tcc_cooling snd_soc_acpi snd_soc_core snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp snd_hda_scodec_component mac80211
[ 132.230423] snd_compress ac97_bus coretemp spi_nor dell_laptop snd_pcm_dmaengine kvm_intel r8153_ecm uvcvideo iTCO_wdt intel_pmc_bxt cdc_ether uvc mei_wdt mtd iTCO_vendor_support ee1004 mei_pxp mei_hdcp libarc4 usbnet videobuf2_vmalloc intel_rapl_msr dell_smm_hwmon snd_hda_intel kvm snd_intel_dspcfg dell_wmi snd_intel_sdw_acpi videobuf2_memops snd_usb_audio snd_hda_codec videobuf2_v4l2 videobuf2_common snd_usbmidi_lib rapl dell_smbios btusb snd_ump iwlwifi snd_hda_core intel_cstate snd_rawmidi dcdbas videodev btrtl snd_hwdep btintel snd_seq r8152 btbcm snd_seq_device dell_wmi_sysman btmtk intel_uncore mii pcspkr typec_displayport dell_wmi_descriptor intel_wmi_thunderbolt firmware_attributes_class wmi_bmof processor_thermal_device_pci_legacy snd_pcm bluetooth cdc_acm mc cfg80211 thunderbolt snd_timer i2c_i801 spi_intel_pci processor_thermal_device snd i2c_smbus spi_intel processor_thermal_wt_hint processor_thermal_rfim soundcore mei_me processor_thermal_rapl intel_rapl_common rfkill mei processor_thermal_wt_req idma64
[ 132.230611] processor_thermal_power_floor processor_thermal_mbox intel_pch_thermal intel_soc_dts_iosf int3403_thermal intel_pmc_core int340x_thermal_zone dell_smo8800 intel_vsec int3400_thermal pmt_telemetry intel_hid acpi_thermal_rel pmt_class sparse_keymap acpi_pad joydev squashfs loop nfnetlink zram i915 i2c_algo_bit drm_buddy rtsx_pci_sdmmc nvme crct10dif_pclmul ttm crc32_pclmul crc32c_intel polyval_clmulni mmc_core polyval_generic nvme_core drm_display_helper ghash_clmulni_intel hid_multitouch sha512_ssse3 ucsi_acpi mxm_wmi sha256_ssse3 typec_ucsi sha1_ssse3 rtsx_pci cec nvme_auth typec i2c_hid_acpi i2c_hid video wmi pinctrl_cannonlake serio_raw fuse
[ 132.230718] CPU: 0 PID: 4129 Comm: nvidia-sleep.sh Tainted: P OE 6.10.6-200.fc40.x86_64 #1
[ 132.230727] Hardware name: Dell Inc. Precision 5540/024X86, BIOS 1.27.0 02/01/2024
[ 132.230732] RIP: 0010:follow_pte+0x1f0/0x220
[ 132.230742] Code: cc cc cc 48 81 e2 00 00 00 c0 48 09 c2 48 f7 d2 48 85 fa 75 20 e8 a0 f4 ff ff 48 8b 35 59 28 86 01 48 81 e6 00 00 00 c0 eb 89 <0f> 0b 48 3b 1f 0f 83 42 fe ff ff bd ea ff ff ff eb b2 49 8b 3c 24
[ 132.230748] RSP: 0018:ffffae47471737e8 EFLAGS: 00010246
[ 132.230756] RAX: 0000000000000000 RBX: 00007f308a012000 RCX: ffffae4747173830
[ 132.230762] RDX: ffffae4747173828 RSI: 00007f308a012000 RDI: ffff9e3135274790
[ 132.230767] RBP: ffffae4747173870 R08: ffffae47471739c8 R09: 0000000000000000
[ 132.230772] R10: 0000000000000002 R11: 0000000000000042 R12: ffffae4747173830
[ 132.230777] R13: ffffae4747173828 R14: ffff9e310c912680 R15: 0000000000000000
[ 132.230782] FS: 00007f5ee2d1c740(0000) GS:ffff9e385be00000(0000) knlGS:0000000000000000
[ 132.230788] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 132.230793] CR2: 00005624732e1358 CR3: 00000001894ec004 CR4: 00000000003706f0
[ 132.230798] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 132.230802] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 132.230806] Call Trace:
[ 132.230811]
[ 132.230814] ? follow_pte+0x1f0/0x220
[ 132.230822] ? __warn.cold+0x8e/0xe8
[ 132.230833] ? follow_pte+0x1f0/0x220
[ 132.230844] ? report_bug+0xff/0x140
[ 132.230856] ? handle_bug+0x3c/0x80
[ 132.230865] ? exc_invalid_op+0x17/0x70
[ 132.230874] ? asm_exc_invalid_op+0x1a/0x20
[ 132.230885] ? follow_pte+0x1f0/0x220
[ 132.230894] follow_phys+0x49/0x110
[ 132.230907] untrack_pfn+0x55/0x120
[ 132.230919] unmap_single_vma+0xa6/0xe0
[ 132.230929] zap_page_range_single+0x122/0x1d0
[ 132.230943] unmap_mapping_range+0x116/0x140
[ 132.230955] nv_revoke_gpu_mappings_locked+0x47/0x70 [nvidia]
[ 132.232686] nv_set_system_power_state+0x1cd/0x470 [nvidia]
[ 132.234355] nv_procfs_write_suspend+0x105/0x1b0 [nvidia]
[ 132.235098] proc_reg_write+0x5a/0xa0
[ 132.235103] vfs_write+0xf5/0x460
[ 132.235107] ? post_alloc_hook+0xe1/0x130
[ 132.235111] ksys_write+0x6d/0xf0
[ 132.235114] do_syscall_64+0x82/0x160
[ 132.235118] ? page_counter_uncharge+0x33/0x80
[ 132.235121] ? drain_stock+0x68/0xa0
[ 132.235124] ? __refill_stock+0x81/0x90
[ 132.235126] ? refill_stock+0x1a/0x30
[ 132.235128] ? try_charge_memcg+0x7ae/0x840
[ 132.235132] ? __mem_cgroup_threshold+0x15/0x150
[ 132.235134] ? __mod_memcg_lruvec_state+0xe5/0x1e0
[ 132.235137] ? __lruvec_stat_mod_folio+0x68/0xa0
[ 132.235140] ? set_ptes.isra.0+0x28/0x90
[ 132.235143] ? do_anonymous_page+0xf8/0x8b0
[ 132.235146] ? __pte_offset_map+0x1b/0x180
[ 132.235149] ? __handle_mm_fault+0xbfb/0x1070
[ 132.235153] ? filp_close+0x1d/0x30
[ 132.235157] ? __count_memcg_events+0x75/0x130
[ 132.235160] ? count_memcg_events.constprop.0+0x1a/0x30
[ 132.235163] ? handle_mm_fault+0x1f0/0x300
[ 132.235167] ? do_user_addr_fault+0x36c/0x620
[ 132.235171] ? clear_bhb_loop+0x25/0x80
[ 132.235175] ? clear_bhb_loop+0x25/0x80
[ 132.235177] ? clear_bhb_loop+0x25/0x80
[ 132.235180] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 132.235184] RIP: 0033:0x7f5ee2e2d984
[ 132.235206] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d c5 06 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89
[ 132.235208] RSP: 002b:00007ffd4340df48 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
[ 132.235211] RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f5ee2e2d984
[ 132.235213] RDX: 0000000000000008 RSI: 00005624732e0f50 RDI: 0000000000000001
[ 132.235215] RBP: 00007ffd4340df70 R08: 0000000000000410 R09: 0000000000000001
[ 132.235216] R10: 0000000000000004 R11: 0000000000000202 R12: 0000000000000008
[ 132.235218] R13: 00005624732e0f50 R14: 00007f5ee2f075c0 R15: 00007f5ee2f04f00
[ 132.235221]
[ 132.235222] —[ end trace 0000000000000000 ]—
[ 132.235245] ------------[ cut here ]------------

@amrits @abchauhan Hey guys, were you able to reproduce this yet?

1 Like