I am trying to create a QEMU/KVM for a H100 the isolation and passtrew of the GPU worked fine as i’ve done it before with a RTX3080 i mostly used this guide to configure the host system.
Host system Conf:
- OS: Ubuntu 22.04.4 LTS x86_64
- Host: RS720A-E12-RS12 01
- Kernel: 6.5.0-25-generic
- CPU: 2x AMD EPYC 9554 (256) @ 3.100GHz
- GPU: NVIDIA H100 PCIe
- GPU: NVIDIA L40
- Memory: 773694MiB
VM configuration:
<domain type="kvm">
<name>VM-H100</name>
<uuid>7b401d92-c418-4b4c-a9cd-2377ccd6bd9a</uuid>
<metadata>
<libosinfo:libosinfo xmlns:libosinfo="http://libosinfo.org/xmlns/libvirt/domain/1.0">
<libosinfo:os id="http://ubuntu.com/ubuntu/22.04"/>
</libosinfo:libosinfo>
</metadata>
<memory unit="KiB">195311616</memory>
<currentMemory unit="KiB">195311616</currentMemory>
<vcpu placement="static">64</vcpu>
<os>
<type arch="x86_64" machine="pc-q35-6.2">hvm</type>
<loader readonly="yes" secure="yes" type="pflash">/usr/share/OVMF/OVMF_CODE_4M.secboot.fd</loader>
<nvram>/var/lib/libvirt/qemu/nvram/VM-H100_VARS.fd</nvram>
<boot dev="hd"/>
</os>
<features>
<acpi/>
<apic/>
<vmport state="off"/>
<smm state="on"/>
</features>
<cpu mode="host-passthrough" check="none" migratable="on">
<topology sockets="1" dies="1" cores="32" threads="2"/>
</cpu>
<clock offset="utc">
<timer name="rtc" tickpolicy="catchup"/>
<timer name="pit" tickpolicy="delay"/>
<timer name="hpet" present="no"/>
</clock>
<on_poweroff>destroy</on_poweroff>
<on_reboot>restart</on_reboot>
<on_crash>destroy</on_crash>
<pm>
<suspend-to-mem enabled="no"/>
<suspend-to-disk enabled="no"/>
</pm>
<devices>
<emulator>/usr/bin/qemu-system-x86_64</emulator>
<disk type="file" device="disk">
<driver name="qemu" type="qcow2"/>
<source file="/var/lib/libvirt/images/VMs/H100-vm.qcow2"/>
<target dev="vda" bus="virtio"/>
<address type="pci" domain="0x0000" bus="0x04" slot="0x00" function="0x0"/>
</disk>
<controller type="usb" index="0" model="qemu-xhci" ports="15">
<address type="pci" domain="0x0000" bus="0x02" slot="0x00" function="0x0"/>
</controller>
<controller type="pci" index="0" model="pcie-root"/>
<controller type="pci" index="1" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="1" port="0x10"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x0" multifunction="on"/>
</controller>
<controller type="pci" index="2" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="2" port="0x11"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x1"/>
</controller>
<controller type="pci" index="3" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="3" port="0x12"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x2"/>
</controller>
<controller type="pci" index="4" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="4" port="0x13"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x3"/>
</controller>
<controller type="pci" index="5" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="5" port="0x14"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x4"/>
</controller>
<controller type="pci" index="6" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="6" port="0x15"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x5"/>
</controller>
<controller type="pci" index="7" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="7" port="0x16"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x6"/>
</controller>
<controller type="pci" index="8" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="8" port="0x17"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x02" function="0x7"/>
</controller>
<controller type="pci" index="9" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="9" port="0x18"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x0" multifunction="on"/>
</controller>
<controller type="pci" index="10" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="10" port="0x19"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x1"/>
</controller>
<controller type="pci" index="11" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="11" port="0x1a"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x2"/>
</controller>
<controller type="pci" index="12" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="12" port="0x1b"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x3"/>
</controller>
<controller type="pci" index="13" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="13" port="0x1c"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x4"/>
</controller>
<controller type="pci" index="14" model="pcie-root-port">
<model name="pcie-root-port"/>
<target chassis="14" port="0x1d"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x03" function="0x5"/>
</controller>
<controller type="sata" index="0">
<address type="pci" domain="0x0000" bus="0x00" slot="0x1f" function="0x2"/>
</controller>
<controller type="virtio-serial" index="0">
<address type="pci" domain="0x0000" bus="0x03" slot="0x00" function="0x0"/>
</controller>
<interface type="network">
<mac address="52:54:00:dd:54:37"/>
<source network="default"/>
<model type="virtio"/>
<address type="pci" domain="0x0000" bus="0x01" slot="0x00" function="0x0"/>
</interface>
<serial type="pty">
<target type="isa-serial" port="0">
<model name="isa-serial"/>
</target>
</serial>
<console type="pty">
<target type="serial" port="0"/>
</console>
<channel type="unix">
<target type="virtio" name="org.qemu.guest_agent.0"/>
<address type="virtio-serial" controller="0" bus="0" port="1"/>
</channel>
<channel type="spicevmc">
<target type="virtio" name="com.redhat.spice.0"/>
<address type="virtio-serial" controller="0" bus="0" port="2"/>
</channel>
<input type="tablet" bus="usb">
<address type="usb" bus="0" port="1"/>
</input>
<input type="mouse" bus="ps2"/>
<input type="keyboard" bus="ps2"/>
<graphics type="spice" autoport="yes">
<listen type="address"/>
<image compression="off"/>
</graphics>
<sound model="ich9">
<address type="pci" domain="0x0000" bus="0x00" slot="0x1b" function="0x0"/>
</sound>
<audio id="1" type="spice"/>
<video>
<model type="qxl" ram="65536" vram="65536" vgamem="16384" heads="1" primary="yes"/>
<address type="pci" domain="0x0000" bus="0x00" slot="0x01" function="0x0"/>
</video>
<hostdev mode="subsystem" type="pci" managed="yes">
<driver name="vfio"/>
<source>
<address domain="0x0000" bus="0xa1" slot="0x00" function="0x0"/>
</source>
<rom bar="on"/>
<address type="pci" domain="0x0000" bus="0x05" slot="0x00" function="0x0"/>
</hostdev>
<redirdev bus="usb" type="spicevmc">
<address type="usb" bus="0" port="2"/>
</redirdev>
<redirdev bus="usb" type="spicevmc">
<address type="usb" bus="0" port="3"/>
</redirdev>
<memballoon model="virtio">
<address type="pci" domain="0x0000" bus="0x06" slot="0x00" function="0x0"/>
</memballoon>
<rng model="virtio">
<backend model="random">/dev/urandom</backend>
<address type="pci" domain="0x0000" bus="0x07" slot="0x00" function="0x0"/>
</rng>
</devices>
</domain>
(Mostly autogen by virt-manager)
in the VM the GPU gets passed threw and i installed the CUDA toolkit and NVIDIA drivers.
after that i restarted the VM but the GPU hadn’t loaded the kernel modules i wanted it to load :
$ lspci -knn | grep -A2 NVIDIA
05:00.0 3D controller [0302]: NVIDIA Corporation GH100 [H100 PCIe] [10de:2331] (rev a1)
Subsystem: NVIDIA Corporation Device [10de:1626]
Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia
dmesg shows this:
[...]
[ 0.876629] pci 0000:05:00.0: [10de:2331] type 00 class 0x030200
[ 0.876845] pci 0000:05:00.0: reg 0x10: [mem 0xffffffffff000000-0xffffffffffffffff 64bit pref]
[ 0.876899] pci 0000:05:00.0: reg 0x18: [mem 0xffffffe000000000-0xffffffffffffffff 64bit pref]
[ 0.876953] pci 0000:05:00.0: reg 0x20: [mem 0xfffffffffe000000-0xffffffffffffffff 64bit pref]
[ 0.877094] pci 0000:05:00.0: Enabling HDA controller
[ 0.880001] pci 0000:05:00.0: 252.048 Gb/s available PCIe bandwidth, limited by 16.0 GT/s PCIe x16 link at 0000:00:02.4 (capable of 504.112 Gb/s with 32.0 GT/s PCIe x16 link)
[...]
[ 1.015965] pci 0000:05:00.0: can't claim BAR 0 [mem 0xffffffffff000000-0xffffffffffffffff 64bit pref]: no compatible bridge window
[ 1.015968] pci 0000:05:00.0: can't claim BAR 2 [mem 0xffffffe000000000-0xffffffffffffffff 64bit pref]: no compatible bridge window
[ 1.015970] pci 0000:05:00.0: can't claim BAR 4 [mem 0xfffffffffe000000-0xffffffffffffffff 64bit pref]: no compatible bridge window
[...]
[ 1.083355] pci 0000:05:00.0: BAR 2: no space for [mem size 0x2000000000 64bit pref]
[ 1.083358] pci 0000:05:00.0: BAR 2: failed to assign [mem 0xffffffe000000000-0xffffffffffffffff 64bit pref]
[ 1.083360] pci 0000:05:00.0: BAR 4: no space for [mem size 0x02000000 64bit pref]
[ 1.083362] pci 0000:05:00.0: BAR 4: failed to assign [mem 0xfffffffffe000000-0xffffffffffffffff 64bit pref]
[ 1.083364] pci 0000:05:00.0: BAR 0: no space for [mem size 0x01000000 64bit pref]
[ 1.083366] pci 0000:05:00.0: BAR 0: failed to assign [mem 0xffffffffff000000-0xffffffffffffffff 64bit pref]
[...]
[ 1.118487] pci_bus 0000:05: resource 0 [io 0xf000-0xffff]
[ 1.118488] pci_bus 0000:05: resource 1 [mem 0xc9200000-0xc93fffff]
[...]
[ 3.239283] nvidia: loading out-of-tree module taints kernel.
[ 3.239294] nvidia: module license 'NVIDIA' taints kernel.
[ 3.239296] Disabling lock debugging due to kernel taint
[ 3.239299] nvidia: module verification failed: signature and/or required key missing - tainting kernel
[ 3.239301] nvidia: module license taints kernel.
[ 3.323123] nvidia-nvlink: Nvlink Core is being initialized, major device number 237
[ 3.323134] NVRM: This PCI I/O region assigned to your NVIDIA device is invalid:
NVRM: BAR0 is 0M @ 0x0 (PCI:0000:05:00.0)
the last message is repeated for all NVIDIA kernel modules and gets output in a loop.
i activated “Above 4G Decoding” in the host bios
i know that’s its a problem with the address space but i don’t know how to fix it. i would be very thankful for some help
the output of nvidia-bug-report.sh
:
nvidia-bug-report.log.gz (1.0 MB)