Ib_write_bw local protection error when size > 65536 in KVM

Hello
I have a server with ConnectX-7 and configured PCI passthrough in kvm virtual machine.
ib_write_bw works inside virtual machine with size 65536:

root@h100-ubuntu:~/perftest# ./ib_write_bw -d mlx5_1  -s 65536 10.160.11.0 -n 2000000
 WARNING: BW peak won't be measured in this run.
---------------------------------------------------------------------------------------
                    RDMA_Write BW Test
 Dual-port       : OFF          Device         : mlx5_1
 Number of qps   : 1            Transport type : IB
 Connection type : RC           Using SRQ      : OFF
 PCIe relax order: ON
 ibv_wr* API     : ON
 TX depth        : 128
 CQ Moderation   : 1
 Mtu             : 4096[B]
 Link type       : IB
 Max inline data : 0[B]
 rdma_cm QPs     : OFF
 Data ex. method : Ethernet
---------------------------------------------------------------------------------------
 local address: LID 0x12 QPN 0x004c PSN 0x1b7ee6 RKey 0x1fff00 VAddr 0x007f98b1955000
 remote address: LID 0x10 QPN 0x004c PSN 0xcbaa1e RKey 0x1fff00 VAddr 0x007f0b884ee000
---------------------------------------------------------------------------------------
 #bytes     #iterations    BW peak[MB/sec]    BW average[MB/sec]   MsgRate[Mpps]
 65536      2000000          0.00               46539.94                   0.744639
---------------------------------------------------------------------------------------

But fails when using a greater size

root@h100-ubuntu:~/perftest# ./ib_write_bw -d mlx5_1  -s 65537 10.160.11.0 -n 2000000
 WARNING: BW peak won't be measured in this run.
---------------------------------------------------------------------------------------
                    RDMA_Write BW Test
 Dual-port       : OFF          Device         : mlx5_1
 Number of qps   : 1            Transport type : IB
 Connection type : RC           Using SRQ      : OFF
 PCIe relax order: ON
 ibv_wr* API     : ON
 TX depth        : 128
 CQ Moderation   : 1
 Mtu             : 4096[B]
 Link type       : IB
 Max inline data : 0[B]
 rdma_cm QPs     : OFF
 Data ex. method : Ethernet
---------------------------------------------------------------------------------------
 local address: LID 0x12 QPN 0x004e PSN 0xf8e011 RKey 0x1fff00 VAddr 0x007fc144e36001
 remote address: LID 0x10 QPN 0x004d PSN 0x10176d RKey 0x1fff00 VAddr 0x007f0a5b1ff000
---------------------------------------------------------------------------------------
 #bytes     #iterations    BW peak[MB/sec]    BW average[MB/sec]   MsgRate[Mpps]
 Completion with error at client
 Failed status 10: wr_id 0 syndrom 0x88
scnt=128, ccnt=0
 Failed to complete run_iter_bw function successfully

and following error appears in dmesg
{code}
[ 1212.370260] mlx5_0/1: QP 77 error: local protection error (0x3b 0x0 0x9d)
{code}

Environment
Hypervisor - stock ubuntu 22.04
linux compute-19 5.15.0-92-generic #102-Ubuntu SMP Wed Jan 10 09:33:48 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux

 /usr/libexec/qemu-kvm -name guest=instance-0000179c,debug-threads=on -S -object {"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-8-instance-0000179c/master-key.aes"} -machine pc-q35-rhel9.2.0,usb=off,dump-guest-core=off,hpet=off,acpi=on -accel kvm -cpu host,migratable=on -m size=1073741824k -overcommit mem-lock=off -smp 64,sockets=2,dies=1,cores=32,threads=1 -object {"qom-type":"memory-backend-ram","id":"ram-node0","size":549755813888,"host-nodes":[0],"policy":"bind"} -numa node,nodeid=0,cpus=0-31,memdev=ram-node0 -object {"qom-type":"memory-backend-ram","id":"ram-node1","size":549755813888,"host-nodes":[1],"policy":"bind"} -numa node,nodeid=1,cpus=32-63,memdev=ram-node1 -uuid bedc3bd5-cee6-4929-85e3-61bb6f33d4b0 -smbios type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,version=27.2.1,serial=bedc3bd5-cee6-4929-85e3-61bb6f33d4b0,uuid=bedc3bd5-cee6-4929-85e3-61bb6f33d4b0,family=Virtual Machine -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=30,server=on,wait=off -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-shutdown -boot strict=on -device {"driver":"pcie-root-port","port":16,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x2"} -device {"driver":"pcie-root-port","port":17,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x2.0x1"} -device {"driver":"pcie-root-port","port":18,"chassis":3,"id":"pci.3","bus":"pcie.0","addr":"0x2.0x2"} -device {"driver":"pcie-root-port","port":19,"chassis":4,"id":"pci.4","bus":"pcie.0","addr":"0x2.0x3"} -device {"driver":"pcie-root-port","port":20,"chassis":5,"id":"pci.5","bus":"pcie.0","addr":"0x2.0x4"} -device {"driver":"pcie-root-port","port":21,"chassis":6,"id":"pci.6","bus":"pcie.0","addr":"0x2.0x5"} -device {"driver":"pcie-root-port","port":22,"chassis":7,"id":"pci.7","bus":"pcie.0","addr":"0x2.0x6"} -device {"driver":"pcie-root-port","port":23,"chassis":8,"id":"pci.8","bus":"pcie.0","addr":"0x2.0x7"} -device {"driver":"pcie-root-port","port":24,"chassis":9,"id":"pci.9","bus":"pcie.0","multifunction":true,"addr":"0x3"} -device {"driver":"pcie-root-port","port":25,"chassis":10,"id":"pci.10","bus":"pcie.0","addr":"0x3.0x1"} -device {"driver":"pcie-root-port","port":26,"chassis":11,"id":"pci.11","bus":"pcie.0","addr":"0x3.0x2"} -device {"driver":"pcie-root-port","port":27,"chassis":12,"id":"pci.12","bus":"pcie.0","addr":"0x3.0x3"} -device {"driver":"pcie-root-port","port":28,"chassis":13,"id":"pci.13","bus":"pcie.0","addr":"0x3.0x4"} -device {"driver":"pcie-root-port","port":29,"chassis":14,"id":"pci.14","bus":"pcie.0","addr":"0x3.0x5"} -device {"driver":"pcie-root-port","port":30,"chassis":15,"id":"pci.15","bus":"pcie.0","addr":"0x3.0x6"} -device {"driver":"pcie-root-port","port":31,"chassis":16,"id":"pci.16","bus":"pcie.0","addr":"0x3.0x7"} -device {"driver":"pcie-root-port","port":32,"chassis":17,"id":"pci.17","bus":"pcie.0","multifunction":true,"addr":"0x4"} -device {"driver":"pcie-pci-bridge","id":"pci.18","bus":"pci.1","addr":"0x0"} -device {"driver":"pcie-root-port","port":33,"chassis":19,"id":"pci.19","bus":"pcie.0","addr":"0x4.0x1"} -device {"driver":"pcie-root-port","port":34,"chassis":20,"id":"pci.20","bus":"pcie.0","addr":"0x4.0x2"} -device {"driver":"pcie-root-port","port":35,"chassis":21,"id":"pci.21","bus":"pcie.0","addr":"0x4.0x3"} -device {"driver":"pcie-root-port","port":36,"chassis":22,"id":"pci.22","bus":"pcie.0","addr":"0x4.0x4"} -device {"driver":"pcie-root-port","port":37,"chassis":23,"id":"pci.23","bus":"pcie.0","addr":"0x4.0x5"} -device {"driver":"pcie-root-port","port":38,"chassis":24,"id":"pci.24","bus":"pcie.0","addr":"0x4.0x6"} -device {"driver":"pcie-root-port","port":39,"chassis":25,"id":"pci.25","bus":"pcie.0","addr":"0x4.0x7"} -device {"driver":"pcie-root-port","port":40,"chassis":26,"id":"pci.26","bus":"pcie.0","multifunction":true,"addr":"0x5"} -device {"driver":"pcie-root-port","port":41,"chassis":27,"id":"pci.27","bus":"pcie.0","addr":"0x5.0x1"} -device {"driver":"pcie-root-port","port":42,"chassis":28,"id":"pci.28","bus":"pcie.0","addr":"0x5.0x2"} -device {"driver":"piix3-usb-uhci","id":"usb","bus":"pci.18","addr":"0x1"} -blockdev {"driver":"file","filename":"/var/lib/nova/mnt/00970d24d89c3c2cc8741169b8ce041f/volume","aio":"native","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"raw","file":"libvirt-1-storage"} -device {"driver":"virtio-blk-pci","bus":"pci.3","addr":"0x0","drive":"libvirt-1-format","id":"virtio-disk0","bootindex":1,"write-cache":"on","serial":"f6c17591-4baa-41a7-8f77-651c0f1225e9"} -netdev {"type":"tap","fd":"33","vhost":true,"vhostfd":"34","id":"hostnet0"} -device {"driver":"virtio-net-pci","host_mtu":9000,"netdev":"hostnet0","id":"net0","mac":"fa:16:3e:ea:ef:9b","bus":"pci.2","addr":"0x0"} -add-fd set=0,fd=29,opaque=serial0-log -chardev pty,id=charserial0,logfile=/dev/fdset/0,logappend=on -device {"driver":"isa-serial","chardev":"charserial0","id":"serial0","index":0} -device {"driver":"usb-tablet","id":"input0","bus":"usb.0","port":"1"} -audiodev {"id":"audio1","driver":"none"} -vnc 172.16.1.79:0,audiodev=audio1 -device {"driver":"virtio-vga","id":"video0","max_outputs":1,"bus":"pcie.0","addr":"0x1"} -global ICH9-LPC.noreboot=off -watchdog-action reset -device {"driver":"vfio-pci","host":"0000:04:00.0","id":"hostdev0","bus":"pci.4","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:23:00.0","id":"hostdev1","bus":"pci.5","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:43:00.0","id":"hostdev2","bus":"pci.6","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:64:00.0","id":"hostdev3","bus":"pci.7","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:84:00.0","id":"hostdev4","bus":"pci.8","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:a3:00.0","id":"hostdev5","bus":"pci.9","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:c3:00.0","id":"hostdev6","bus":"pci.10","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:e4:00.0","id":"hostdev7","bus":"pci.11","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:09:00.0","id":"hostdev8","bus":"pci.12","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:0a:00.0","id":"hostdev9","bus":"pci.13","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:0b:00.0","id":"hostdev10","bus":"pci.14","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:0c:00.0","id":"hostdev11","bus":"pci.15","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:45:00.0","id":"hostdev12","bus":"pci.16","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:65:00.0","id":"hostdev13","bus":"pci.17","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:03:00.0","id":"hostdev14","bus":"pci.19","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:24:00.0","id":"hostdev15","bus":"pci.20","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:44:00.0","id":"hostdev16","bus":"pci.21","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:63:00.0","id":"hostdev17","bus":"pci.22","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:83:00.0","id":"hostdev18","bus":"pci.23","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:a4:00.0","id":"hostdev19","bus":"pci.24","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:c4:00.0","id":"hostdev20","bus":"pci.25","addr":"0x0"} -device {"driver":"vfio-pci","host":"0000:e3:00.0","id":"hostdev21","bus":"pci.26","addr":"0x0"} -device {"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.27","addr":"0x0"} -object {"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"} -device {"driver":"virtio-rng-pci","rng":"objrng0","id":"rng0","bus":"pci.28","addr":"0x0"} -device {"driver":"vmcoreinfo"} -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
root@compute-19:/var/log/kolla/nova# modinfo kvm
filename:       /lib/modules/5.15.0-92-generic/kernel/arch/x86/kvm/kvm.ko
license:        GPL
author:         Qumranet
srcversion:     0F3FBE4863EEB2F24491990
depends:
retpoline:      Y
intree:         Y
name:           kvm
vermagic:       5.15.0-92-generic SMP mod_unload modversions
sig_id:         PKCS#7
signer:         Build time autogenerated kernel key
sig_key:        73:38:55:0D:10:EE:16:04:61:8F:B7:0F:DC:2B:3E:9B:37:5C:34:5F
sig_hashalgo:   sha512
root@compute-19:/var/log/kolla/nova# modinfo vfio
name:           vfio
filename:       (builtin)
softdep:        post: vfio_iommu_type1 vfio_iommu_spapr_tce
alias:          devname:vfio/vfio
alias:          char-major-10-196
description:    VFIO - User Level meta-driver
author:         Alex Williamson <alex.williamson@redhat.com>
license:        GPL v2
file:           drivers/vfio/vfio
version:        0.3
parm:           enable_unsafe_noiommu_mode:Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false) (bool)
BOOT_IMAGE=/boot/vmlinuz-5.15.0-92-generic root=/dev/mapper/vg0-lv--0 ro amd_iommu=on iommu=pt

Virtual machine:
ubuntu 22.04 with MLNX_OFED_LINUX-23.10-1.1.9.0-ubuntu22.04-x86_64

root@h100-ubuntu:~# ethtool -i ibp19s0
driver: mlx5_core[ib_ipoib]
version: 23.10-1.1.9
firmware-version: 28.36.1010 (MT_0000000838)
expansion-rom-version:
bus-info: 0000:13:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: yes

perftest: GitHub - linux-rdma/perftest: Infiniband Verbs Performance Tests - commit afe39a9b857440123af047659c2a337ef3107acc (HEAD → master, origin/master, origin/HEAD)

Dear customer,

Here are the suggestions for you:

  • Install MLNX OFED driver in VM and test with Perf Test tool coming with the driver again;
  • Upgrade the Firmware to the latest version

Thanks