Hi,
4 graphics cards RTX 3080 running one process each at a time. No screens.
I always have an error that freezes the computer after one hour more or less. I have tried different xorg.conf configurations without success.
The temperatures are fine. This is are the indicators when running:
nvidia-smi --query-gpu=name,gpu_bus_id,utilization.gpu,utilization.memory,temperature.gpu,fan.speed,power.draw -
-format=csv
name, pci.bus_id, utilization.gpu [%], utilization.memory [%], temperature.gpu, fan.speed [%], power.draw [W]
NVIDIA GeForce RTX 3080, 00000000:05:00.0, 100 %, 100 %, 65, 76 %, 269.89 W
NVIDIA GeForce RTX 3080, 00000000:08:00.0, 100 %, 100 %, 53, 65 %, 207.35 W
NVIDIA GeForce RTX 3080, 00000000:09:00.0, 100 %, 100 %, 62, 72 %, 270.00 W
NVIDIA GeForce RTX 3080, 00000000:0A:00.0, 100 %, 100 %, 57, 70 %, 249.55 W
Xorg configuration:
# nvidia-xconfig: X configuration file generated by nvidia-xconfig
# nvidia-xconfig: version 470.57.02
Section "ServerLayout"
Identifier "Layout0"
Screen 0 "intel"
Screen 1 "Screen0"
Screen 2 "Screen1" RightOf "Screen0"
Screen 3 "Screen2" RightOf "Screen1"
Screen 4 "Screen3" RightOf "Screen2"
EndSection
Section "Device"
Identifier "intel"
Driver "intel"
BusID "PCI:0@0:2:0"
EndSection
Section "Screen"
Identifier "intel"
Device "intel"
EndSection
Section "Monitor"
Identifier "Monitor0"
VendorName "Unknown"
ModelName "Unknown"
Option "DPMS"
EndSection
Section "Monitor"
Identifier "Monitor1"
VendorName "Unknown"
ModelName "Unknown"
Option "DPMS"
EndSection
Section "Monitor"
Identifier "Monitor2"
VendorName "Unknown"
ModelName "Unknown"
Option "DPMS"
EndSection
Section "Monitor"
Identifier "Monitor3"
VendorName "Unknown"
ModelName "Unknown"
Option "DPMS"
EndSection
Section "Device"
Identifier "Device0"
Driver "nvidia"
VendorName "NVIDIA Corporation"
BoardName "NVIDIA GeForce RTX 3080"
BusID "PCI:5:0:0"
EndSection
Section "Device"
Identifier "Device1"
Driver "nvidia"
VendorName "NVIDIA Corporation"
BoardName "NVIDIA GeForce RTX 3080"
BusID "PCI:9:0:0"
EndSection
Section "Device"
Identifier "Device2"
Driver "nvidia"
VendorName "NVIDIA Corporation"
BoardName "NVIDIA GeForce RTX 3080"
BusID "PCI:8:0:0"
EndSection
Section "Device"
Identifier "Device3"
Driver "nvidia"
VendorName "NVIDIA Corporation"
BoardName "NVIDIA GeForce RTX 3080"
BusID "PCI:10:0:0"
EndSection
Section "Screen"
Identifier "Screen0"
Device "Device0"
Monitor "Monitor0"
DefaultDepth 24
Option "AllowEmptyInitialConfiguration" "True"
Option "Coolbits" "28"
SubSection "Display"
Depth 24
EndSubSection
EndSection
Section "Screen"
Identifier "Screen1"
Device "Device1"
Monitor "Monitor1"
DefaultDepth 24
Option "AllowEmptyInitialConfiguration" "True"
Option "Coolbits" "28"
SubSection "Display"
Depth 24
EndSubSection
EndSection
Section "Screen"
Identifier "Screen2"
Device "Device2"
Monitor "Monitor2"
DefaultDepth 24
Option "AllowEmptyInitialConfiguration" "True"
Option "Coolbits" "28"
SubSection "Display"
Depth 24
EndSubSection
EndSection
Section "Screen"
Identifier "Screen3"
Device "Device3"
Monitor "Monitor3"
DefaultDepth 24
Option "AllowEmptyInitialConfiguration" "True"
Option "Coolbits" "28"
SubSection "Display"
Depth 24
EndSubSection
EndSection
This is the output in the Syslog:
Thank you