Fedora 32 Workstation using NVIDIA Quadro P5000 Mobile - blank external monitor

The laptop is Dell Precision 7720. Previously using Fedora 30 with docking and 3 external monitors without any problem. After upgrade to Fedora 32, the external monitor just blank. xrandr error “xrandr: Configure crtc 4 failed” . Already try to change the crtc but the same error return. I’ve been battling with this since April. Any help will be appreciated.

Some errors highlight :
Aug 13 10:18:46 FCW32 /usr/libexec/gdm-x-session[2939]: randr: falling back to unsynchronized pixmap sharing
Aug 13 10:18:46 FCW32 /usr/libexec/gdm-x-session[2939]: (EE) modeset(G0): failed to set mode: No space left on device

nvidia-bug-file : nvidia-bug-report.log (3.9 MB)

After a month battling with this problem, finally I’ve solved the mystery.
I had to uninstall everything related with nvidia driver including the conf files from ;

/usr/lib/udev/rules.d/
/lib/udev/rules.d/
/usr/share/X11/xorg.conf.d/

reboot the machine, then install the following driver/tools :

$ rpm -qa | grep nvidia
nvidia-libXNVCtrl-450.66-1.fc32.x86_64
xorg-x11-drv-nvidia-devel-450.66-1.fc32.x86_64
kmod-nvidia-5.7.16-200.fc32.x86_64-450.66-1.fc32.x86_64
nvidia-query-resource-opengl-1.0.0-7.fc32.x86_64
nvidia-driver-cuda-450.66-1.fc32.x86_64
nvidia-libXNVCtrl-devel-450.66-1.fc32.x86_64
nvidia-modprobe-450.66-1.fc32.x86_64
akmod-nvidia-450.66-1.fc32.x86_64
nvidia-driver-NVML-450.66-1.fc32.x86_64
nvidia-settings-450.66-1.fc32.x86_64
xorg-x11-drv-nvidia-cuda-libs-450.66-1.fc32.x86_64
nvidia-texture-tools-2.0.8-22.fc32.x86_64
nvidia-persistenced-450.66-1.fc32.x86_64
xorg-x11-drv-nvidia-libs-450.66-1.fc32.x86_64
xorg-x11-drv-nvidia-kmodsrc-450.66-1.fc32.x86_64
nvidia-driver-cuda-libs-450.66-1.fc32.x86_64
nvidia-texture-tools-devel-2.0.8-22.fc32.x86_64
nvidia-kmod-common-450.66-1.fc32.noarch
nvidia-driver-NvFBCOpenGL-450.66-1.fc32.x86_64
pcp-pmda-nvidia-gpu-5.2.0-1.fc32.x86_64
nvidia-query-resource-opengl-lib-1.0.0-7.fc32.x86_64
nvidia-xconfig-450.66-1.fc32.x86_64

Install the long live driver from nvidia 450.66 :

Get it here - https://forums.developer.nvidia.com/t/linux-solaris-and-freebsd-driver-450-66-long-lived-branch-release/147366

Edit the /etc/X11/xorg.conf : (I have 3 external monitor + 1 laptop display)

Section "ServerLayout"
    Identifier     "Layout0"
    Screen      0  "Screen0" 0 0
    InputDevice    "Keyboard0" "CoreKeyboard"
    InputDevice    "Mouse0" "CorePointer"
    Option         "Xinerama" "0"
EndSection

Section "Files"
    ModulePath      "/usr/lib64/xorg/modules"
    FontPath        "catalogue:/etc/X11/fontpath.d"
    FontPath        "built-ins"
EndSection

Section "Module"
    Load           "dri2"
    Load           "glamoregl"
    Load           "modesetting"
    Load           "glx"
EndSection

Section "InputDevice"

    # generated from default
    Identifier     "Mouse0"
    Driver         "mouse"
    Option         "Protocol" "auto"
    Option         "Device" "/dev/input/mice"
    Option         "Emulate3Buttons" "no"
    Option         "ZAxisMapping" "4 5"
EndSection

Section "InputDevice"
    # generated from default
    Identifier     "Keyboard0"
    Driver         "kbd"
EndSection

Section "Monitor"
    # HorizSync source: builtin, VertRefresh source: builtin
    Identifier     "Monitor0"
    VendorName     "DELL"
    ModelName      "DELL U2311H"
    HorizSync       30.0 - 83.0
    VertRefresh     56.0 - 76.0
    Option         "DPMS"
EndSection

Section "Monitor"
    # HorizSync source: builtin, VertRefresh source: builtin
    Identifier     "Monitor1"
    VendorName     "DELL"
    ModelName      "DELL U2311H"
    HorizSync       28.0 - 55.0
    VertRefresh     43.0 - 72.0
    Option         "DPMS"
EndSection

Section "Monitor"

    # HorizSync source: builtin, VertRefresh source: builtin
    Identifier     "Monitor2"
    VendorName     "SAMSUNG"
    ModelName      "C27F390"
    HorizSync       28.0 - 55.0
    VertRefresh     43.0 - 72.0
    Option         "DPMS"
EndSection

Section "Monitor"
    Identifier     "Monitor3"
    Option         "DPMS"
    VendorName     "SHP"
    ModelName      "1446"
EndSection

Section "Device"
    Identifier     "Device0"
    Driver         "nvidia"
    VendorName     "NVIDIA Corporation"
    BoardName      "Quadro P5000"
    BusID          "PCI:1@0:0:0"
EndSection

Section "Device"
    Identifier     "Device1"
    Driver         "intel"
    BusID          "PCI:0@0:2:0"
EndSection

Section "Screen"
    Identifier     "Screen0"
    Device         "Device0"
    Monitor        "Monitor0"
    DefaultDepth    24
    Option         "nvidiaXineramaInfoOrder" "DFP-7"
    Option         "Stereo" "0"
    Option         "metamodes" "DP-6.2: 1920x1080_60 +5888+0, DP-6.3: 1920x1080_60 +0+36, DP-8.1: 1920x1080_60 +1920+36"
    Option         "SLI" "Off"
    Option         "MultiGPU" "Off"
    Option         "BaseMosaic" "off"
    SubSection     "Display"
        Depth       24
    EndSubSection
EndSection

Section "Screen"
    Identifier     "Screen1"
    Device         "Device0"
    Monitor        "Monitor2"
    DefaultDepth    24
    SubSection     "Display"
        Depth       24
    EndSubSection
EndSection

Section "Screen"
    Identifier     "Screen2"
    Device         "Device0"
    Monitor        "Monitor1"
    DefaultDepth    24
    SubSection     "Display"
        Depth       24
    EndSubSection
EndSection

Section "Screen"
    Identifier     "Screen3"
    Device         "Device1"
    Monitor        "Monitor3"
    DefaultDepth    24
    SubSection     "Display"
        Depth       24
    EndSubSection
EndSection

Reboot, and… still not running… :(
I had to manually do :

[ctrl] + [alt] + [F4]
$ init 3
$ modprobe nvidia-drm
$ init 5

Then I got all display running…
I’m still figuring out why I had to manually do the modprobe… why don’t the driver do it for me…
Please if somebody have the answer… do point me out…

Hi BTM,

I have been trying to recreate issue but no luck so far. I just wanted to check with you if issue is reproducible with single monitor connected.

Hi @amrits

It’s because of mixed driver between nvidia-driver* and xorg-x11-drv-nvidia*…
I’m now using kernel 5.7.17-200.fc32.x86_64 with only xorg-x11-drv-nvidia* package with bumblebee.
It’s run okay now. But sometimes PRIME display (eDP1) did not shows up… only the external monitor is run.

Is such problem occurs when you connect only one external display ?

Yup, the same…

This is using only one external monitor. I lose PRIME monitor eDP1…
current nvidia-bug-report - nvidia-bug-report.log.gz (1.6 MB)

Ok, it looks like sometimes you are loosing the prime monitor and sometimes you are loosing the external monitor.
Please confirm if my understanding is correct.

Hi @amrits,
Yes, I can confirm that. The other thing is, If I’m only using the laptop eDP1 display, I can access Nvidia GPU for some period time. After that, the Nvidia GPU just wouldn’t respond.

Hi @amrits,

I noticed that from time to time, my Nvidia Quadro P5000 just went dead with error message :

Sep 20 21:13:40 fcw32 kernel: pcieport 0000:00:01.0: AER: Root Port link has been reset
Sep 20 21:13:40 fcw32 kernel: pcieport 0000:00:01.0: AER: device recovery successful
Sep 20 21:13:40 fcw32 kernel: pcieport 0000:00:01.0: AER: Multiple Uncorrected (Fatal) error received: 0000:01:00.1
Sep 20 21:13:40 fcw32 kernel: pcieport 0000:00:01.0: AER: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, (Requester ID)
Sep 20 21:13:40 fcw32 kernel: pcieport 0000:00:01.0: AER: device [8086:1901] error status/mask=00004000/00000000
Sep 20 21:13:40 fcw32 kernel: pcieport 0000:00:01.0: AER: [14] CmpltTO (First)
Sep 20 21:13:40 fcw32 kernel: snd_hda_intel 0000:01:00.1: AER: PCIe Bus Error: severity=Uncorrected (Fatal), type=Inaccessible, (Unregistered Agent ID)
Sep 20 21:13:40 fcw32 kernel: snd_hda_intel 0000:01:00.1: AER: Error of this Agent is reported first
Sep 20 21:13:40 fcw32 kernel: nvidia 0000:01:00.0: AER: can’t recover (no error_detected callback)
Sep 20 21:13:40 fcw32 kernel: snd_hda_intel 0000:01:00.1: AER: can’t recover (no error_detected callback)
Sep 20 21:13:40 fcw32 kernel: nvidia 0000:01:00.0: can’t change power state from D0 to D3hot (config space inaccessible)
Sep 20 21:13:41 fcw32 kernel: pcieport 0000:00:01.0: AER: Root Port link has been reset
Sep 20 21:13:41 fcw32 kernel: pcieport 0000:00:01.0: AER: device recovery successful
Sep 20 21:13:41 fcw32 kernel: nvidia 0000:01:00.0: AER: can’t recover (no error_detected callback)
Sep 20 21:13:41 fcw32 kernel: snd_hda_intel 0000:01:00.1: AER: can’t recover (no error_detected callback)

can’t do modprobe as the system will freeze…
I had to reboot to make it active again…
Is it a symptom where the GPU is not well?

syslog attached - nvidia-dead.log (84.7 KB)

Hi btm,
This looks like to be a different issue than the original one reported.
Are you doing any specific steps which create such error messages ?

Hi @amrits,

The steps is pretty straight forward.
Run this command

$optirun glxspheres64

it runs well…
then kill it by [ctrl] + [c]…
then restart the command… and gives an error message saying that there is no secondary GPU…
I’m checking the /var/log/messages and found this error.
restarting the nvidia by

$modprobe nvidia-drm

makes my laptop freeze… had to press the power button for 5 seconds to reboot. This has happened mostly without an external monitor attached…
I don’t know if this is related to Nvidia driver package or the Fedora 32 OS itself or my GPU is nearly dead…