NVMe Driver not registered with nvidia-fs - GDS NVMe unsupported on Rocky 8.6

@faraz.ahmed , can you try changing compatibility mode to false in cufile.json and then check gdscheck.py

@kmodukuri , can you please review the logs below , there is a problem with the kernel modules to insert nvidia-fs.ko

[  620.034971] nvidia_fs: module using GPL-only symbols uses symbols from proprietary module nvidia.
[  620.034977] nvidia_fs: Unknown symbol nvidia_p2p_dma_unmap_pages (err -2)
[  620.035013] nvidia_fs: module using GPL-only symbols uses symbols from proprietary module nvidia.
[  620.035015] nvidia_fs: Unknown symbol nvidia_p2p_get_pages (err -2)
[  620.035028] nvidia_fs: module using GPL-only symbols uses symbols from proprietary module nvidia.
[  620.035029] nvidia_fs: Unknown symbol nvidia_p2p_put_pages (err -2)
[  620.035037] nvidia_fs: module using GPL-only symbols uses symbols from proprietary module nvidia.
[  620.035039] nvidia_fs: Unknown symbol nvidia_p2p_dma_map_pages (err -2)
[  620.035049] nvidia_fs: module using GPL-only symbols uses symbols from proprietary module nvidia.
[  620.035050] nvidia_fs: Unknown symbol nvidia_p2p_free_dma_mapping (err -2)
[  620.035052] nvidia_fs: module using GPL-only symbols uses symbols from proprietary module nvidia.
[  620.035053] nvidia_fs: Unknown symbol nvidia_p2p_free_page_table (err -2)
[root@gds src]#
[root@gds src]#
[root@gds src]# uname -r
5.14.0-284.30.1.el9_2.x86_64
[root@gds src]#
[root@gds src]# cat /etc/os-release
NAME="Rocky Linux"
VERSION="9.2 (Blue Onyx)"
ID="rocky"
ID_LIKE="rhel centos fedora"
VERSION_ID="9.2"
PLATFORM_ID="platform:el9"
PRETTY_NAME="Rocky Linux 9.2 (Blue Onyx)"
ANSI_COLOR="0;32"
LOGO="fedora-logo-icon"
CPE_NAME="cpe:/o:rocky:rocky:9::baseos"
HOME_URL="https://rockylinux.org/"
BUG_REPORT_URL="https://bugs.rockylinux.org/"
SUPPORT_END="2032-05-31"
ROCKY_SUPPORT_PRODUCT="Rocky-Linux-9"
ROCKY_SUPPORT_PRODUCT_VERSION="9.2"
REDHAT_SUPPORT_PRODUCT="Rocky Linux"
REDHAT_SUPPORT_PRODUCT_VERSION="9.2"
[root@gds src]#
[root@gds src]# /usr/local/cuda-12.2/gds/tools/gdscheck.py -p
 Platform verification error :
nvidia-fs driver is not loaded

[root@gds src]# ll
total 72888
-rw-r--r--. 1 root root      578 Nov 20 17:14 config-host.h
-rwxr-xr-x. 1 root root     9867 Nov 20 17:13 configure
-rwxr-xr-x. 1 root root     4203 Nov 20 17:13 create_nv.symvers.sh
-rw-r--r--. 1 root root       99 Nov 20 17:25 cufile.log
-rw-r--r--. 1 root root     2621 Nov 20 17:13 dkms.conf
-rw-r--r--. 1 root root        9 Nov 20 17:13 GDS_VERSION
-rw-r--r--. 1 root root     4792 Nov 20 17:13 Makefile
-rw-r--r--. 1 root root       37 Nov 20 17:14 modules.order
-rw-r--r--. 1 root root        0 Nov 20 17:14 Module.symvers
-rw-r--r--. 1 root root     6413 Nov 20 17:13 nvfs-batch.c
-rw-r--r--. 1 root root     1600 Nov 20 17:13 nvfs-batch.h
-rw-r--r--. 1 root root   254368 Nov 20 17:14 nvfs-batch.o
-rw-r--r--. 1 root root    77409 Nov 20 17:13 nvfs-core.c
-rw-r--r--. 1 root root     9851 Nov 20 17:13 nvfs-core.h
-rw-r--r--. 1 root root   668328 Nov 20 17:14 nvfs-core.o
-rw-r--r--. 1 root root    27701 Nov 20 17:13 nvfs-dma.c
-rw-r--r--. 1 root root     5727 Nov 20 17:13 nvfs-dma.h
-rw-r--r--. 1 root root   303584 Nov 20 17:14 nvfs-dma.o
-rw-r--r--. 1 root root     3018 Nov 20 17:13 nvfs-fault.c
-rw-r--r--. 1 root root     1894 Nov 20 17:13 nvfs-fault.h
-rw-r--r--. 1 root root     1942 Nov 20 17:13 nvfs-kernel-interface.c
-rw-r--r--. 1 root root     1818 Nov 20 17:13 nvfs-kernel-interface.h
-rw-r--r--. 1 root root     9984 Nov 20 17:14 nvfs-kernel-interface.o
-rw-r--r--. 1 root root    48234 Nov 20 17:13 nvfs-mmap.c
-rw-r--r--. 1 root root     9424 Nov 20 17:13 nvfs-mmap.h
-rw-r--r--. 1 root root   486264 Nov 20 17:14 nvfs-mmap.o
-rw-r--r--. 1 root root     4122 Nov 20 17:13 nvfs-mod.c
-rw-r--r--. 1 root root   198576 Nov 20 17:14 nvfs-mod.o
-rw-r--r--. 1 root root     1593 Nov 20 17:13 nvfs-p2p.h
-rw-r--r--. 1 root root    27016 Nov 20 17:13 nvfs-pci.c
-rw-r--r--. 1 root root     8041 Nov 20 17:13 nvfs-pci.h
-rw-r--r--. 1 root root   336624 Nov 20 17:14 nvfs-pci.o
-rw-r--r--. 1 root root     7683 Nov 20 17:13 nvfs-proc.c
-rw-r--r--. 1 root root   242088 Nov 20 17:14 nvfs-proc.o
-rw-r--r--. 1 root root     8016 Nov 20 17:13 nvfs-rdma.c
-rw-r--r--. 1 root root     1622 Nov 20 17:13 nvfs-rdma.h
-rw-r--r--. 1 root root   212240 Nov 20 17:14 nvfs-rdma.o
-rw-r--r--. 1 root root    21780 Nov 20 17:13 nvfs-stat.c
-rw-r--r--. 1 root root     7284 Nov 20 17:13 nvfs-stat.h
-rw-r--r--. 1 root root   357568 Nov 20 17:14 nvfs-stat.o
-rw-r--r--. 1 root root     1810 Nov 20 17:13 nvfs-vers.h
-rw-r--r--. 1 root root  3080992 Nov 20 17:14 nvidia-fs.ko
-rw-r--r--. 1 root root      371 Nov 20 17:14 nvidia-fs.mod
-rw-r--r--. 1 root root     3856 Nov 20 17:14 nvidia-fs.mod.c
-rw-r--r--. 1 root root    58056 Nov 20 17:14 nvidia-fs.mod.o
-rw-r--r--. 1 root root  3025432 Nov 20 17:14 nvidia-fs.o
-rw-r--r--. 1 root root 64981949 Nov 20 17:14 nvidia.ko
-rw-r--r--. 1 root root      968 Nov 20 17:14 nv.symvers
[root@gds src]# insmod nvidia-fs.ko
insmod: ERROR: could not insert module nvidia-fs.ko: Unknown symbol in module

Starting from nvidia-fs version 2.17.5, GDS is only supported with Open version of nvidia.ko driver due to GPL restrictions on using proprietory modules. please install nvidia open RM driver.

Starting with CUDA toolkit 12.2.2, GDS kernel driver package nvidia-gds version 12.2.2-1 (provided by nvidia-fs-dkms 2.17.5-1) and above is only supported with the NVIDIA open kernel driver. Follow the instructions in Removing CUDA Toolkit and Driver to remove existing NVIDIA driver packages and then follow instructions in NVIDIA Open GPU Kernel Modules to install NVIDIA open kernel driver packages.

@kmodukuri , thanks for the update , installting open driver solves the issue of properties.use_compat_mode : false but NVMe is still showing unsupported .

What should i do to make it supported ?

[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdscheck.py -p
 GDS release version: 1.8.1.2
 nvidia_fs version:  2.18 libcufile version: 2.12
 Platform: x86_64
 ============
 ENVIRONMENT:
 ============
 =====================
 DRIVER CONFIGURATION:
 =====================
 NVMe               : Unsupported
 NVMeOF             : Unsupported
 SCSI               : Unsupported
 ScaleFlux CSD      : Unsupported
 NVMesh             : Unsupported
 DDN EXAScaler      : Unsupported
 IBM Spectrum Scale : Unsupported
 NFS                : Unsupported
 BeeGFS             : Unsupported
 WekaFS             : Unsupported
 Userspace RDMA     : Unsupported
 --Mellanox PeerDirect : Disabled
 --rdma library        : Not Loaded (libcufile_rdma.so)
 --rdma devices        : Not configured
 --rdma_device_status  : Up: 0 Down: 0
 =====================
 CUFILE CONFIGURATION:
 =====================
 properties.use_compat_mode : false
 properties.force_compat_mode : false
 properties.gds_rdma_write_support : true
 properties.use_poll_mode : false
 properties.poll_mode_max_size_kb : 4
 properties.max_batch_io_size : 128
 properties.max_batch_io_timeout_msecs : 5
 properties.max_direct_io_size_kb : 16384
 properties.max_device_cache_size_kb : 131072
 properties.max_device_pinned_mem_size_kb : 33554432
 properties.posix_pool_slab_size_kb : 4 1024 16384
 properties.posix_pool_slab_count : 128 64 32
 properties.rdma_peer_affinity_policy : RoundRobin
 properties.rdma_dynamic_routing : 0
 fs.generic.posix_unaligned_writes : false
 fs.lustre.posix_gds_min_kb: 0
 fs.beegfs.posix_gds_min_kb: 0
 fs.weka.rdma_write_support: false
 fs.gpfs.gds_write_support: false
 profile.nvtx : false
 profile.cufile_stats : 0
 miscellaneous.api_check_aggressive : false
 execution.max_io_threads : 4
 execution.max_io_queue_depth : 128
 execution.parallel_io : true
 execution.min_io_threshold_size_kb : 8192
 execution.max_request_parallelism : 4
 properties.force_odirect_mode : false
 properties.prefer_iouring : false
 =========
 GPU INFO:
 =========
 GPU index 0 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 1 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 2 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 3 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 ==============
 PLATFORM INFO:
 ==============
 IOMMU: disabled
 Nvidia Driver Info Status: Supported(Nvidia Open Driver Installed)
 Cuda Driver Version Installed:  12030
 Platform: PowerEdge R750xa, Arch: x86_64(Linux 5.14.0-284.30.1.el9_2.x86_64)
 Platform verification succeeded

@kmodukuri @faraz.ahmed
Not able to verify GDS .

[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdscheck.py -p
 GDS release version: 1.8.1.2
 nvidia_fs version:  2.18 libcufile version: 2.12
 Platform: x86_64
 ============
 ENVIRONMENT:
 ============
 =====================
 DRIVER CONFIGURATION:
 =====================
 NVMe               : Unsupported
 NVMeOF             : Unsupported
 SCSI               : Unsupported
 ScaleFlux CSD      : Unsupported
 NVMesh             : Unsupported
 DDN EXAScaler      : Unsupported
 IBM Spectrum Scale : Unsupported
 NFS                : Unsupported
 BeeGFS             : Unsupported
 WekaFS             : Unsupported
 Userspace RDMA     : Unsupported
 --Mellanox PeerDirect : Disabled
 --rdma library        : Not Loaded (libcufile_rdma.so)
 --rdma devices        : Not configured
 --rdma_device_status  : Up: 0 Down: 0
 =====================
 CUFILE CONFIGURATION:
 =====================
 properties.use_compat_mode : false
 properties.force_compat_mode : false
 properties.gds_rdma_write_support : true
 properties.use_poll_mode : false
 properties.poll_mode_max_size_kb : 4
 properties.max_batch_io_size : 128
 properties.max_batch_io_timeout_msecs : 5
 properties.max_direct_io_size_kb : 16384
 properties.max_device_cache_size_kb : 131072
 properties.max_device_pinned_mem_size_kb : 33554432
 properties.posix_pool_slab_size_kb : 4 1024 16384
 properties.posix_pool_slab_count : 128 64 32
 properties.rdma_peer_affinity_policy : RoundRobin
 properties.rdma_dynamic_routing : 0
 fs.generic.posix_unaligned_writes : false
 fs.lustre.posix_gds_min_kb: 0
 fs.beegfs.posix_gds_min_kb: 0
 fs.weka.rdma_write_support: false
 fs.gpfs.gds_write_support: false
 profile.nvtx : false
 profile.cufile_stats : 0
 miscellaneous.api_check_aggressive : false
 execution.max_io_threads : 4
 execution.max_io_queue_depth : 128
 execution.parallel_io : true
 execution.min_io_threshold_size_kb : 8192
 execution.max_request_parallelism : 4
 properties.force_odirect_mode : false
 properties.prefer_iouring : false
 =========
 GPU INFO:
 =========
 GPU index 0 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 1 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 2 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 3 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 ==============
 PLATFORM INFO:
 ==============
 IOMMU: disabled
 Nvidia Driver Info Status: Supported(Nvidia Open Driver Installed)
 Cuda Driver Version Installed:  12030
 Platform: PowerEdge R750xa, Arch: x86_64(Linux 5.14.0-284.30.1.el9_2.x86_64)
 Platform verification succeeded

[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdsio_verify -f /mnt/gds/gdstest -n 1 -m 0 -s 1024 -o 0  -d 0 -t 0 -S -g 4K
file register error: GPUDirect Storage not supported on current file

Cufile.log

 21-11-2023 17:11:04:156 [pid=145168 tid=145168] ERROR  cufio-fs:199 NVMe Driver not registered with nvidia-fs!!!
 21-11-2023 17:11:04:156 [pid=145168 tid=145168] ERROR  cufio-fs:199 NVMe Driver not registered with nvidia-fs!!!
 21-11-2023 17:11:04:156 [pid=145168 tid=145168] NOTICE  cufio-fs:441 dumping volume attributes: DEVNAME:/dev/nvme1n1,ID_FS_TYPE:ext4,ID_FS_USAGE:filesystem,UDEV_PCI_BRIDGE:0000:30:03.0,device/transport:pcie,ext4_journal_mode:ordered,fsid:f0578b196d5913c20x,numa_node:0,queue/logical_block_size:4096,wwid:eui.36344830526001490025384500000001,
 21-11-2023 17:11:04:156 [pid=145168 tid=145168] ERROR  cufio:297 cuFileHandleRegister error, file checks failed
 21-11-2023 17:11:04:156 [pid=145168 tid=145168] ERROR  cufio:339 cuFileHandleRegister error: GPUDirect Storage not supported on current file

@karanveersingh5623 can you please check your setup for proper installation of MOFED

sudo ./mlnxofedinstall --with-nvmf --with-nfsrdma --enable-gds --add-kernel-support --dkms

update the initramfs

UBUNTU:
$ sudo update-initramfs -u -k `uname -r
$sudo reboot

Redhat/CentOs
$ sudo dracut -f
$ sudo reboot

After reboot. check the path of nvme.ko with

$ modinfo nvme

$ objdump -t /lib/modules/4.18.0-477.27.1.el8_8.x86_64/extra/mlnx-nvme/host/nvme.ko | grep nvme_nvfs

you should see symbols something related to the patch if installation went correctly.

0000000000000050 l F .text 00000000000000ab nvme_nvfs_unmap_data
0000000000002000 l F .text 000000000000026e nvme_nvfs_map_data.constprop.0
00000000000002e3 l F .text.unlikely 000000000000003f nvme_nvfs_map_data.constprop.0.cold
0000000000000040 l F .text 0000000000000010 __pfx_nvme_nvfs_unmap_data
0000000000001ff0 l F .text 0000000000000010 __pfx_nvme_nvfs_map_data.constprop.0

If the installation still did not patch the nvme.ko,
please share the version of MOFED Package you are using on this setup

@kmodukuri , thanks for your support , I got the NVMe supported but error is coming when writing data to NVMe drive , check the below logs

[root@gds ~]# mkfs.ext4 /dev/nvme1n1
mke2fs 1.46.5 (30-Dec-2021)
/dev/nvme1n1 contains a ext4 file system
        last mounted on /mnt/gds on Thu Nov 23 14:12:03 2023
Proceed anyway? (y,N) y
Discarding device blocks: done
Creating filesystem with 937684566 4k blocks and 234422272 inodes
Filesystem UUID: 3ae00634-089d-41eb-9cfe-6d23ab4d6a90
Superblock backups stored on blocks:
        32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632, 2654208,
        4096000, 7962624, 11239424, 20480000, 23887872, 71663616, 78675968,
        102400000, 214990848, 512000000, 550731776, 644972544

Allocating group tables: done
Writing inode tables: done
Creating journal (262144 blocks): done
Writing superblocks and filesystem accounting information: done

[root@gds ~]# mount -o data=ordered /dev/nvme1n1 /mnt/gds/
[root@gds ~]# lsblk
NAME        MAJ:MIN RM  SIZE RO TYPE MOUNTPOINTS
nvme1n1     259:0    0  3.5T  0 disk /mnt/gds
nvme0n1     259:1    0  3.5T  0 disk
├─nvme0n1p1 259:2    0  600M  0 part /boot/efi
├─nvme0n1p2 259:3    0    1G  0 part /boot
├─nvme0n1p3 259:4    0  100G  0 part /home
├─nvme0n1p4 259:5    0    4G  0 part [SWAP]
└─nvme0n1p5 259:6    0  3.4T  0 part /
[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdscheck.py -p
 GDS release version: 1.8.1.2
 nvidia_fs version:  2.18 libcufile version: 2.12
 Platform: x86_64
 ============
 ENVIRONMENT:
 ============
 =====================
 DRIVER CONFIGURATION:
 =====================
 NVMe               : Supported
 NVMeOF             : Unsupported
 SCSI               : Unsupported
 ScaleFlux CSD      : Unsupported
 NVMesh             : Unsupported
 DDN EXAScaler      : Unsupported
 IBM Spectrum Scale : Unsupported
 NFS                : Unsupported
 BeeGFS             : Unsupported
 WekaFS             : Unsupported
 Userspace RDMA     : Unsupported
 --Mellanox PeerDirect : Disabled
 --rdma library        : Not Loaded (libcufile_rdma.so)
 --rdma devices        : Not configured
 --rdma_device_status  : Up: 0 Down: 0
 =====================
 CUFILE CONFIGURATION:
 =====================
 properties.use_compat_mode : false
 properties.force_compat_mode : false
 properties.gds_rdma_write_support : true
 properties.use_poll_mode : false
 properties.poll_mode_max_size_kb : 4
 properties.max_batch_io_size : 128
 properties.max_batch_io_timeout_msecs : 5
 properties.max_direct_io_size_kb : 16384
 properties.max_device_cache_size_kb : 131072
 properties.max_device_pinned_mem_size_kb : 33554432
 properties.posix_pool_slab_size_kb : 4 1024 16384
 properties.posix_pool_slab_count : 128 64 32
 properties.rdma_peer_affinity_policy : RoundRobin
 properties.rdma_dynamic_routing : 0
 fs.generic.posix_unaligned_writes : false
 fs.lustre.posix_gds_min_kb: 0
 fs.beegfs.posix_gds_min_kb: 0
 fs.weka.rdma_write_support: false
 fs.gpfs.gds_write_support: false
 profile.nvtx : false
 profile.cufile_stats : 0
 miscellaneous.api_check_aggressive : false
 execution.max_io_threads : 4
 execution.max_io_queue_depth : 128
 execution.parallel_io : true
 execution.min_io_threshold_size_kb : 8192
 execution.max_request_parallelism : 4
 properties.force_odirect_mode : false
 properties.prefer_iouring : false
 =========
 GPU INFO:
 =========
 GPU index 0 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 1 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 2 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 3 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 ==============
 PLATFORM INFO:
 ==============
 IOMMU: disabled
 Nvidia Driver Info Status: Supported(Nvidia Open Driver Installed)
 Cuda Driver Version Installed:  12030
 Platform: PowerEdge R750xa, Arch: x86_64(Linux 5.14.0-284.30.1.el9_2.x86_64)
 Platform verification succeeded

[root@gds ~]# mount | grep /mnt/gds/
[root@gds ~]# mount | grep /mnt/
/dev/nvme1n1 on /mnt/gds type ext4 (rw,relatime,seclabel,stripe=32,data=ordered)
[root@gds ~]#
[root@gds ~]#
[root@gds ~]#
[root@gds ~]# vim /mnt/gds/gdstest
[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdsio_verify -f /mnt/gds/gdstest -n 1 -m 0 -s 1024 -o 0  -d 0 -t 0 -S -g 4K
gpu index :0,file :/mnt/gds/gdstest, gpu buffer alignment :4096, gpu buffer offset :0, gpu devptr offset :0, file offset :0, io_requested :1024, io_chunk_size :1024, bufregister :false, sync :0, nr ios :1,
fsync :0,
Batch mode: 0
Pread . Read 4 bytes. requested 1024 bytes
Pread . Read 4 bytes. requested 1024 bytes
Data Verification Success
[root@gds ~]#
[root@gds ~]#
[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdsio -f /mnt/gds/test -d 0 -w 4 -s 1M -x 0 -i 4K:32K:1K -I 3
write io failed of type 1 size: 9216 , ret: 0
failed to submit io of type 1 ret: -5
Error: IO failed stopping traffic, fd :115 ret:-5 errno :5
io failed :ret :-5 errno :5, file offset :270336, block size  :9216
write io failed of type 1 size: 9216 , ret: 0
failed to submit io of type 1 ret: -5
Error: IO failed stopping traffic, fd :115 ret:-5 errno :5
io failed :ret :-5 errno :5, file offset :794624, block size  :9216
write io failed of type 1 size: 9216 , ret: 0
failed to submit io of type 1 ret: -5
Error: IO failed stopping traffic, fd :115 ret:-5 errno :5
io failed :ret :-5 errno :5, file offset :8192, block size  :9216
write io failed of type 1 size: 9216 , ret: 0
failed to submit io of type 1 ret: -5
Error: IO failed stopping traffic, fd :115 ret:-5 errno :5
io failed :ret :-5 errno :5, file offset :532480, block size  :9216

Cufile.log

24-11-2023 15:38:57:722 [pid=166112 tid=166148] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 0
 24-11-2023 15:38:57:722 [pid=166112 tid=166148] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5
 24-11-2023 15:38:57:722 [pid=166112 tid=166150] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 32768
 24-11-2023 15:38:57:722 [pid=166112 tid=166150] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5
 24-11-2023 15:38:57:723 [pid=166112 tid=166147] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 0
 24-11-2023 15:38:57:723 [pid=166112 tid=166147] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5
 24-11-2023 15:38:57:723 [pid=166112 tid=166149] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 32768
 24-11-2023 15:38:57:723 [pid=166112 tid=166149] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5
 24-11-2023 15:43:40:840 [pid=166299 tid=166335] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 0
 24-11-2023 15:43:40:840 [pid=166299 tid=166335] ERROR  0:904 write failed at file_offset 274432 cur_size 8192 retval -5
 24-11-2023 15:43:40:841 [pid=166299 tid=166335] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5
 24-11-2023 15:43:40:841 [pid=166299 tid=166337] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 0
 24-11-2023 15:43:40:841 [pid=166299 tid=166337] ERROR  0:904 write failed at file_offset 798720 cur_size 8192 retval -5
 24-11-2023 15:43:40:841 [pid=166299 tid=166337] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5
 24-11-2023 15:43:40:841 [pid=166299 tid=166334] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 0
 24-11-2023 15:43:40:841 [pid=166299 tid=166334] ERROR  0:904 write failed at file_offset 12288 cur_size 8192 retval -5
 24-11-2023 15:43:40:841 [pid=166299 tid=166334] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5
 24-11-2023 15:43:40:842 [pid=166299 tid=166336] ERROR  0:1534 IOCTL failed io-type 1 ret -5 expected 8192 gpu_page_offset 0
 24-11-2023 15:43:40:842 [pid=166299 tid=166336] ERROR  0:904 write failed at file_offset 536576 cur_size 8192 retval -5
 24-11-2023 15:43:40:842 [pid=166299 tid=166336] ERROR  cufio_core:2392 could not perform unaligned writes for fd: 115 ret: -5

dmesg

[98780.064015] EXT4-fs (nvme1n1): unmounting filesystem.
[98855.075760] EXT4-fs (nvme1n1): mounted filesystem with ordered data mode. Quota mode: none.
[99097.712359] blk_print_req_error: 6 callbacks suppressed
[99097.712361] I/O error, dev nvme1n1, sector 33292816 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99097.712372] nvidia-fs:write IO failed :-5
[99097.712813] I/O error, dev nvme1n1, sector 33293840 op 0x1:(WRITE) flags 0x8800 phys_seg 1 prio class 2
[99097.712825] nvidia-fs:write IO failed :-5
[99097.713249] I/O error, dev nvme1n1, sector 33292304 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99097.713260] nvidia-fs:write IO failed :-5
[99097.713702] I/O error, dev nvme1n1, sector 33293328 op 0x1:(WRITE) flags 0x8800 phys_seg 1 prio class 2
[99097.713717] nvidia-fs:write IO failed :-5
[99380.827646] I/O error, dev nvme1n1, sector 33292824 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99380.827659] nvidia-fs:write IO failed :-5
[99380.828122] I/O error, dev nvme1n1, sector 33293848 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99380.828133] nvidia-fs:write IO failed :-5
[99380.828559] I/O error, dev nvme1n1, sector 33292312 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99380.828569] nvidia-fs:write IO failed :-5
[99380.829040] I/O error, dev nvme1n1, sector 33293336 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99380.829051] nvidia-fs:write IO failed :-5
[99638.376462] I/O error, dev nvme1n1, sector 33293848 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99638.376474] nvidia-fs:write IO failed :-5
[99638.376900] I/O error, dev nvme1n1, sector 33292312 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99638.376911] nvidia-fs:write IO failed :-5
[99638.377318] I/O error, dev nvme1n1, sector 33292824 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99638.377326] nvidia-fs:write IO failed :-5
[99638.377739] I/O error, dev nvme1n1, sector 33293336 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99638.377753] nvidia-fs:write IO failed :-5

@kmodukuri , anything you can share on above error , we are almost close in fixing the issue :)

Can you try a simple test with

/usr/local/cuda-12.3/gds/tools/gdsio_verify -f /mnt/gds/gdstest -n 1 -m 0 -s 4k -o 0 -d 0 -t 0

/usr/local/cuda-12.3/gds/tools/gdsio -f /mnt/gds/test -d 0 -w 1 -s 4k -x 0 -i 4K -I 1 -V

@kmodukuri , please refer the below requested output

[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdsio_verify -f /mnt/gds/gdstest -n 1 -m 0 -s 4k -o 0 -d 0 -t 0
gpu index :0,file :/mnt/gds/gdstest, gpu buffer alignment :0, gpu buffer offset :0, gpu devptr offset :0, file offset :0, io_requested :4096, io_chunk_size :4096, bufregister :true, sync :0, nr ios :1,
fsync :0,
Batch mode: 0
Pread . Read 4 bytes. requested 4096 bytes
Pread . Read 4 bytes. requested 4096 bytes
***FAIL Read md5sum Mismatch!!
Data Verification Failed
[root@gds ~]#
[root@gds ~]#
[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdsio -f /mnt/gds/test -d 0 -w 1 -s 4k -x 0 -i 4K -I 1 -V
write io failed of type 1 size: 4096 , ret: 0
failed to submit io of type 1 ret: -5
Error: IO failed stopping traffic, fd :115 ret:-5 errno :5
io failed :ret :-5 errno :5, file offset :0, block size  :4096

@kmodukuri , anything on above ?

@kmodukuri , anything that can be shared on above issue , please let me know if you need more information

Not sure from the errors why the NVMe DMA is failing. Can you please look at the below questions.

what drive is this ?

lspci -nn

what is the exact MOFED version ?

ofed_info -s ?

Is this a local nvme drive or a NVMeOF drive ?

nvme list

is IOMMU set to ON ?
$ gdscheck -p

what is the PCIe topology look like ?

lspci -tvvv ?

[root@gds ~]# lspci -nn
00:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
00:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
00:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
00:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
00:02.0 System peripheral [0880]: Intel Corporation Ice Lake MSM [8086:09a6]
00:02.1 System peripheral [0880]: Intel Corporation Ice Lake PMON MSM [8086:09a7]
00:02.4 Non-Essential Instrumentation [1300]: Intel Corporation Ice Lake NorthPeak [8086:3456] (rev 01)
00:11.0 Unassigned class [ff00]: Intel Corporation C620 Series Chipset Family MROM 0 [8086:a1ec] (rev 0a)
00:11.5 SATA controller [0106]: Intel Corporation C620 Series Chipset Family SSATA Controller [AHCI mode] [8086:a1d2] (rev 0a)
00:14.0 USB controller [0c03]: Intel Corporation C620 Series Chipset Family USB 3.0 xHCI Controller [8086:a1af] (rev 0a)
00:14.2 Signal processing controller [1180]: Intel Corporation C620 Series Chipset Family Thermal Subsystem [8086:a1b1] (rev 0a)
00:16.0 Communication controller [0780]: Intel Corporation C620 Series Chipset Family MEI Controller #1 [8086:a1ba] (rev 0a)
00:16.1 Communication controller [0780]: Intel Corporation C620 Series Chipset Family MEI Controller #2 [8086:a1bb] (rev 0a)
00:16.4 Communication controller [0780]: Intel Corporation C620 Series Chipset Family MEI Controller #3 [8086:a1be] (rev 0a)
00:17.0 SATA controller [0106]: Intel Corporation C620 Series Chipset Family SATA Controller [AHCI mode] [8086:a182] (rev 0a)
00:1c.0 PCI bridge [0604]: Intel Corporation C620 Series Chipset Family PCI Express Root Port #1 [8086:a190] (rev fa)
00:1c.4 PCI bridge [0604]: Intel Corporation C620 Series Chipset Family PCI Express Root Port #5 [8086:a194] (rev fa)
00:1c.5 PCI bridge [0604]: Intel Corporation C620 Series Chipset Family PCI Express Root Port #6 [8086:a195] (rev fa)
00:1d.0 PCI bridge [0604]: Intel Corporation C620 Series Chipset Family PCI Express Root Port #9 [8086:a198] (rev fa)
00:1f.0 ISA bridge [0601]: Intel Corporation Device [8086:a1cb] (rev 0a)
00:1f.2 Memory controller [0580]: Intel Corporation C620 Series Chipset Family Power Management Controller [8086:a1a1] (rev 0a)
00:1f.4 SMBus [0c05]: Intel Corporation C620 Series Chipset Family SMBus [8086:a1a3] (rev 0a)
00:1f.5 Serial bus controller [0c80]: Intel Corporation C620 Series Chipset Family SPI Controller [8086:a1a4] (rev 0a)
02:00.0 PCI bridge [0604]: PLDA PCI Express Bridge [1556:be00] (rev 02)
03:00.0 VGA compatible controller [0300]: Matrox Electronics Systems Ltd. Integrated Matrox G200eW3 Graphics Controller [102b:0536] (rev 04)
04:00.0 Ethernet controller [0200]: Broadcom Inc. and subsidiaries NetXtreme BCM5720 Gigabit Ethernet PCIe [14e4:165f]
04:00.1 Ethernet controller [0200]: Broadcom Inc. and subsidiaries NetXtreme BCM5720 Gigabit Ethernet PCIe [14e4:165f]
05:00.0 SATA controller [0106]: Marvell Technology Group Ltd. 88SE9230 PCIe 2.0 x2 4-port SATA 6 Gb/s RAID Controller [1b4b:9230] (rev 11)
16:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
16:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
16:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
16:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
16:02.0 PCI bridge [0604]: Intel Corporation Device [8086:347a] (rev 04)
17:00.0 3D controller [0302]: NVIDIA Corporation GA100 [A100 PCIe 80GB] [10de:20b5] (rev a1)
30:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
30:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
30:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
30:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
30:02.0 PCI bridge [0604]: Intel Corporation Device [8086:347a] (rev 04)
30:03.0 PCI bridge [0604]: Intel Corporation Device [8086:347b] (rev 04)
30:04.0 PCI bridge [0604]: Intel Corporation Device [8086:347c] (rev 04)
31:00.0 Non-Volatile memory controller [0108]: Samsung Electronics Co Ltd NVMe SSD Controller PM174X [144d:a826]
32:00.0 Non-Volatile memory controller [0108]: Samsung Electronics Co Ltd NVMe SSD Controller PM9A1/PM9A3/980PRO [144d:a80a]
33:00.0 Ethernet controller [0200]: Intel Corporation Ethernet Controller X710 for 10GbE SFP+ [8086:1572] (rev 02)
33:00.1 Ethernet controller [0200]: Intel Corporation Ethernet Controller X710 for 10GbE SFP+ [8086:1572] (rev 02)
33:00.2 Ethernet controller [0200]: Intel Corporation Ethernet Controller X710 for 10GbE SFP+ [8086:1572] (rev 02)
33:00.3 Ethernet controller [0200]: Intel Corporation Ethernet Controller X710 for 10GbE SFP+ [8086:1572] (rev 02)
4a:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
4a:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
4a:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
4a:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
64:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
64:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
64:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
64:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
64:02.0 PCI bridge [0604]: Intel Corporation Device [8086:347a] (rev 04)
65:00.0 3D controller [0302]: NVIDIA Corporation GA100 [A100 PCIe 80GB] [10de:20b5] (rev a1)
7e:00.0 System peripheral [0880]: Intel Corporation Device [8086:3450]
7e:00.1 System peripheral [0880]: Intel Corporation Device [8086:3451]
7e:00.2 System peripheral [0880]: Intel Corporation Device [8086:3452]
7e:00.3 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
7e:00.5 System peripheral [0880]: Intel Corporation Device [8086:3455]
7e:02.0 System peripheral [0880]: Intel Corporation Ice Lake UPI Misc [8086:3440]
7e:02.1 System peripheral [0880]: Intel Corporation Ice Lake UPI Link/Phy0 [8086:3441]
7e:02.2 System peripheral [0880]: Intel Corporation Device [8086:3442]
7e:03.0 System peripheral [0880]: Intel Corporation Ice Lake UPI Misc [8086:3440]
7e:03.1 System peripheral [0880]: Intel Corporation Ice Lake UPI Link/Phy0 [8086:3441]
7e:03.2 System peripheral [0880]: Intel Corporation Device [8086:3442]
7e:04.0 System peripheral [0880]: Intel Corporation Ice Lake UPI Misc [8086:3440]
7e:04.1 System peripheral [0880]: Intel Corporation Ice Lake UPI Link/Phy0 [8086:3441]
7e:04.2 System peripheral [0880]: Intel Corporation Device [8086:3442]
7e:04.3 System peripheral [0880]: Intel Corporation Device [8086:3443]
7e:05.0 System peripheral [0880]: Intel Corporation Device [8086:3445]
7e:05.1 System peripheral [0880]: Intel Corporation Device [8086:3446]
7e:05.2 System peripheral [0880]: Intel Corporation Device [8086:3447]
7e:06.0 System peripheral [0880]: Intel Corporation Device [8086:3445]
7e:06.1 System peripheral [0880]: Intel Corporation Device [8086:3446]
7e:06.2 System peripheral [0880]: Intel Corporation Device [8086:3447]
7e:07.0 System peripheral [0880]: Intel Corporation Device [8086:3445]
7e:07.1 System peripheral [0880]: Intel Corporation Device [8086:3446]
7e:07.2 System peripheral [0880]: Intel Corporation Device [8086:3447]
7e:0b.0 System peripheral [0880]: Intel Corporation Device [8086:3448]
7e:0b.1 System peripheral [0880]: Intel Corporation Device [8086:3448]
7e:0b.2 System peripheral [0880]: Intel Corporation Device [8086:344b]
7e:0c.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
7e:0d.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
7e:0e.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
7e:0f.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
7e:1a.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
7e:1b.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
7e:1c.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
7e:1d.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
7f:00.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:00.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:00.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:00.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:00.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:00.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:00.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:00.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:01.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:02.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:03.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:04.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
7f:0a.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0a.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0a.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0a.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0a.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0a.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0a.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0a.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0b.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0c.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0d.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:0e.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
7f:1d.0 System peripheral [0880]: Intel Corporation Device [8086:344f]
7f:1d.1 System peripheral [0880]: Intel Corporation Device [8086:3457]
7f:1e.0 System peripheral [0880]: Intel Corporation Device [8086:3458] (rev 06)
7f:1e.1 System peripheral [0880]: Intel Corporation Device [8086:3459] (rev 06)
7f:1e.2 System peripheral [0880]: Intel Corporation Device [8086:345a] (rev 06)
7f:1e.3 System peripheral [0880]: Intel Corporation Device [8086:345b] (rev 06)
7f:1e.4 System peripheral [0880]: Intel Corporation Device [8086:345c] (rev 06)
7f:1e.5 System peripheral [0880]: Intel Corporation Device [8086:345d] (rev 06)
7f:1e.6 System peripheral [0880]: Intel Corporation Device [8086:345e] (rev 06)
7f:1e.7 System peripheral [0880]: Intel Corporation Device [8086:345f] (rev 06)
80:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
80:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
80:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
80:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
80:02.0 System peripheral [0880]: Intel Corporation Ice Lake MSM [8086:09a6]
80:02.1 System peripheral [0880]: Intel Corporation Ice Lake PMON MSM [8086:09a7]
80:02.4 Non-Essential Instrumentation [1300]: Intel Corporation Ice Lake NorthPeak [8086:3456] (rev 01)
97:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
97:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
97:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
97:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
97:02.0 PCI bridge [0604]: Intel Corporation Device [8086:347a] (rev 04)
98:00.0 Ethernet controller [0200]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017]
98:00.1 Ethernet controller [0200]: Mellanox Technologies MT27800 Family [ConnectX-5] [15b3:1017]
b0:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
b0:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
b0:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
b0:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
c9:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
c9:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
c9:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
c9:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
c9:02.0 PCI bridge [0604]: Intel Corporation Device [8086:347a] (rev 04)
ca:00.0 3D controller [0302]: NVIDIA Corporation GA100 [A100 PCIe 80GB] [10de:20b5] (rev a1)
e2:00.0 System peripheral [0880]: Intel Corporation Ice Lake Memory Map/VT-d [8086:09a2] (rev 04)
e2:00.1 System peripheral [0880]: Intel Corporation Ice Lake Mesh 2 PCIe [8086:09a4] (rev 04)
e2:00.2 System peripheral [0880]: Intel Corporation Ice Lake RAS [8086:09a3] (rev 04)
e2:00.4 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
e2:02.0 PCI bridge [0604]: Intel Corporation Device [8086:347a] (rev 04)
e3:00.0 3D controller [0302]: NVIDIA Corporation GA100 [A100 PCIe 80GB] [10de:20b5] (rev a1)
fe:00.0 System peripheral [0880]: Intel Corporation Device [8086:3450]
fe:00.1 System peripheral [0880]: Intel Corporation Device [8086:3451]
fe:00.2 System peripheral [0880]: Intel Corporation Device [8086:3452]
fe:00.3 Host bridge [0600]: Intel Corporation Ice Lake IEH [8086:0998]
fe:00.5 System peripheral [0880]: Intel Corporation Device [8086:3455]
fe:02.0 System peripheral [0880]: Intel Corporation Ice Lake UPI Misc [8086:3440]
fe:02.1 System peripheral [0880]: Intel Corporation Ice Lake UPI Link/Phy0 [8086:3441]
fe:02.2 System peripheral [0880]: Intel Corporation Device [8086:3442]
fe:03.0 System peripheral [0880]: Intel Corporation Ice Lake UPI Misc [8086:3440]
fe:03.1 System peripheral [0880]: Intel Corporation Ice Lake UPI Link/Phy0 [8086:3441]
fe:03.2 System peripheral [0880]: Intel Corporation Device [8086:3442]
fe:04.0 System peripheral [0880]: Intel Corporation Ice Lake UPI Misc [8086:3440]
fe:04.1 System peripheral [0880]: Intel Corporation Ice Lake UPI Link/Phy0 [8086:3441]
fe:04.2 System peripheral [0880]: Intel Corporation Device [8086:3442]
fe:04.3 System peripheral [0880]: Intel Corporation Device [8086:3443]
fe:05.0 System peripheral [0880]: Intel Corporation Device [8086:3445]
fe:05.1 System peripheral [0880]: Intel Corporation Device [8086:3446]
fe:05.2 System peripheral [0880]: Intel Corporation Device [8086:3447]
fe:06.0 System peripheral [0880]: Intel Corporation Device [8086:3445]
fe:06.1 System peripheral [0880]: Intel Corporation Device [8086:3446]
fe:06.2 System peripheral [0880]: Intel Corporation Device [8086:3447]
fe:07.0 System peripheral [0880]: Intel Corporation Device [8086:3445]
fe:07.1 System peripheral [0880]: Intel Corporation Device [8086:3446]
fe:07.2 System peripheral [0880]: Intel Corporation Device [8086:3447]
fe:0b.0 System peripheral [0880]: Intel Corporation Device [8086:3448]
fe:0b.1 System peripheral [0880]: Intel Corporation Device [8086:3448]
fe:0b.2 System peripheral [0880]: Intel Corporation Device [8086:344b]
fe:0c.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
fe:0d.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
fe:0e.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
fe:0f.0 Performance counters [1101]: Intel Corporation Device [8086:344a]
fe:1a.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
fe:1b.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
fe:1c.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
fe:1d.0 Performance counters [1101]: Intel Corporation Device [8086:2880]
ff:00.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:00.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:00.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:00.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:00.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:00.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:00.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:00.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:01.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:02.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:03.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.0 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.1 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.2 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.3 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.4 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.5 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.6 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:04.7 System peripheral [0880]: Intel Corporation Device [8086:344c]
ff:0a.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0a.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0a.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0a.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0a.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0a.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0a.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0a.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0b.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0c.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0d.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.0 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.1 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.2 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.3 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.4 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.5 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.6 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:0e.7 System peripheral [0880]: Intel Corporation Device [8086:344d]
ff:1d.0 System peripheral [0880]: Intel Corporation Device [8086:344f]
ff:1d.1 System peripheral [0880]: Intel Corporation Device [8086:3457]
ff:1e.0 System peripheral [0880]: Intel Corporation Device [8086:3458] (rev 06)
ff:1e.1 System peripheral [0880]: Intel Corporation Device [8086:3459] (rev 06)
ff:1e.2 System peripheral [0880]: Intel Corporation Device [8086:345a] (rev 06)
ff:1e.3 System peripheral [0880]: Intel Corporation Device [8086:345b] (rev 06)
ff:1e.4 System peripheral [0880]: Intel Corporation Device [8086:345c] (rev 06)
ff:1e.5 System peripheral [0880]: Intel Corporation Device [8086:345d] (rev 06)
ff:1e.6 System peripheral [0880]: Intel Corporation Device [8086:345e] (rev 06)
ff:1e.7 System peripheral [0880]: Intel Corporation Device [8086:345f] (rev 06)

This is a local NVMe drive .

[root@gds ~]# ofed_info -s
MLNX_OFED_LINUX-5.8-3.0.7.0:

IOMMU is off

[root@gds ~]# /usr/local/cuda-12.3/gds/tools/gdscheck.py -p
 GDS release version: 1.8.1.2
 nvidia_fs version:  2.18 libcufile version: 2.12
 Platform: x86_64
 ============
 ENVIRONMENT:
 ============
 =====================
 DRIVER CONFIGURATION:
 =====================
 NVMe               : Supported
 NVMeOF             : Unsupported
 SCSI               : Unsupported
 ScaleFlux CSD      : Unsupported
 NVMesh             : Unsupported
 DDN EXAScaler      : Unsupported
 IBM Spectrum Scale : Unsupported
 NFS                : Unsupported
 BeeGFS             : Unsupported
 WekaFS             : Unsupported
 Userspace RDMA     : Unsupported
 --Mellanox PeerDirect : Disabled
 --rdma library        : Not Loaded (libcufile_rdma.so)
 --rdma devices        : Not configured
 --rdma_device_status  : Up: 0 Down: 0
 =====================
 CUFILE CONFIGURATION:
 =====================
 properties.use_compat_mode : false
 properties.force_compat_mode : false
 properties.gds_rdma_write_support : true
 properties.use_poll_mode : false
 properties.poll_mode_max_size_kb : 4
 properties.max_batch_io_size : 128
 properties.max_batch_io_timeout_msecs : 5
 properties.max_direct_io_size_kb : 16384
 properties.max_device_cache_size_kb : 131072
 properties.max_device_pinned_mem_size_kb : 33554432
 properties.posix_pool_slab_size_kb : 4 1024 16384
 properties.posix_pool_slab_count : 128 64 32
 properties.rdma_peer_affinity_policy : RoundRobin
 properties.rdma_dynamic_routing : 0
 fs.generic.posix_unaligned_writes : false
 fs.lustre.posix_gds_min_kb: 0
 fs.beegfs.posix_gds_min_kb: 0
 fs.weka.rdma_write_support: false
 fs.gpfs.gds_write_support: false
 profile.nvtx : false
 profile.cufile_stats : 0
 miscellaneous.api_check_aggressive : false
 execution.max_io_threads : 4
 execution.max_io_queue_depth : 128
 execution.parallel_io : true
 execution.min_io_threshold_size_kb : 1024
 execution.max_request_parallelism : 4
 properties.force_odirect_mode : false
 properties.prefer_iouring : false
 =========
 GPU INFO:
 =========
 GPU index 0 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 1 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 2 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 GPU index 3 NVIDIA A100 80GB PCIe bar:1 bar size (MiB):131072 supports GDS, IOMMU State: Disabled
 ==============
 PLATFORM INFO:
 ==============
 IOMMU: disabled
 Nvidia Driver Info Status: Supported(Nvidia Open Driver Installed)
 Cuda Driver Version Installed:  12030
 Platform: PowerEdge R750xa, Arch: x86_64(Linux 5.14.0-284.30.1.el9_2.x86_64)
 Platform verification succeeded

GDS mount NVMe ssd → Samsung Electronics Co Ltd NVMe SSD Controller PM9A1/PM9A3/980PRO

OS drive is other NVMe device , please refer the lsblk and lspci outputs below

[root@gds ~]# lsblk
NAME        MAJ:MIN RM  SIZE RO TYPE MOUNTPOINTS
nvme1n1     259:0    0  3.5T  0 disk /mnt/gds
nvme0n1     259:1    0  3.5T  0 disk
├─nvme0n1p1 259:2    0  600M  0 part /boot/efi
├─nvme0n1p2 259:3    0    1G  0 part /boot
├─nvme0n1p3 259:4    0  100G  0 part /home
├─nvme0n1p4 259:5    0    4G  0 part [SWAP]
└─nvme0n1p5 259:6    0  3.4T  0 part /
[root@gds ~]# lspci -tvvv
-+-[0000:ff]-+-00.0  Intel Corporation Device 344c
 |           +-00.1  Intel Corporation Device 344c
 |           +-00.2  Intel Corporation Device 344c
 |           +-00.3  Intel Corporation Device 344c
 |           +-00.4  Intel Corporation Device 344c
 |           +-00.5  Intel Corporation Device 344c
 |           +-00.6  Intel Corporation Device 344c
 |           +-00.7  Intel Corporation Device 344c
 |           +-01.0  Intel Corporation Device 344c
 |           +-01.1  Intel Corporation Device 344c
 |           +-01.2  Intel Corporation Device 344c
 |           +-01.3  Intel Corporation Device 344c
 |           +-01.4  Intel Corporation Device 344c
 |           +-01.5  Intel Corporation Device 344c
 |           +-01.6  Intel Corporation Device 344c
 |           +-01.7  Intel Corporation Device 344c
 |           +-02.0  Intel Corporation Device 344c
 |           +-02.1  Intel Corporation Device 344c
 |           +-02.2  Intel Corporation Device 344c
 |           +-02.3  Intel Corporation Device 344c
 |           +-02.4  Intel Corporation Device 344c
 |           +-02.5  Intel Corporation Device 344c
 |           +-02.6  Intel Corporation Device 344c
 |           +-02.7  Intel Corporation Device 344c
 |           +-03.0  Intel Corporation Device 344c
 |           +-03.1  Intel Corporation Device 344c
 |           +-03.2  Intel Corporation Device 344c
 |           +-03.3  Intel Corporation Device 344c
 |           +-03.4  Intel Corporation Device 344c
 |           +-03.5  Intel Corporation Device 344c
 |           +-03.6  Intel Corporation Device 344c
 |           +-03.7  Intel Corporation Device 344c
 |           +-04.0  Intel Corporation Device 344c
 |           +-04.1  Intel Corporation Device 344c
 |           +-04.2  Intel Corporation Device 344c
 |           +-04.3  Intel Corporation Device 344c
 |           +-04.4  Intel Corporation Device 344c
 |           +-04.5  Intel Corporation Device 344c
 |           +-04.6  Intel Corporation Device 344c
 |           +-04.7  Intel Corporation Device 344c
 |           +-0a.0  Intel Corporation Device 344d
 |           +-0a.1  Intel Corporation Device 344d
 |           +-0a.2  Intel Corporation Device 344d
 |           +-0a.3  Intel Corporation Device 344d
 |           +-0a.4  Intel Corporation Device 344d
 |           +-0a.5  Intel Corporation Device 344d
 |           +-0a.6  Intel Corporation Device 344d
 |           +-0a.7  Intel Corporation Device 344d
 |           +-0b.0  Intel Corporation Device 344d
 |           +-0b.1  Intel Corporation Device 344d
 |           +-0b.2  Intel Corporation Device 344d
 |           +-0b.3  Intel Corporation Device 344d
 |           +-0b.4  Intel Corporation Device 344d
 |           +-0b.5  Intel Corporation Device 344d
 |           +-0b.6  Intel Corporation Device 344d
 |           +-0b.7  Intel Corporation Device 344d
 |           +-0c.0  Intel Corporation Device 344d
 |           +-0c.1  Intel Corporation Device 344d
 |           +-0c.2  Intel Corporation Device 344d
 |           +-0c.3  Intel Corporation Device 344d
 |           +-0c.4  Intel Corporation Device 344d
 |           +-0c.5  Intel Corporation Device 344d
 |           +-0c.6  Intel Corporation Device 344d
 |           +-0c.7  Intel Corporation Device 344d
 |           +-0d.0  Intel Corporation Device 344d
 |           +-0d.1  Intel Corporation Device 344d
 |           +-0d.2  Intel Corporation Device 344d
 |           +-0d.3  Intel Corporation Device 344d
 |           +-0d.4  Intel Corporation Device 344d
 |           +-0d.5  Intel Corporation Device 344d
 |           +-0d.6  Intel Corporation Device 344d
 |           +-0d.7  Intel Corporation Device 344d
 |           +-0e.0  Intel Corporation Device 344d
 |           +-0e.1  Intel Corporation Device 344d
 |           +-0e.2  Intel Corporation Device 344d
 |           +-0e.3  Intel Corporation Device 344d
 |           +-0e.4  Intel Corporation Device 344d
 |           +-0e.5  Intel Corporation Device 344d
 |           +-0e.6  Intel Corporation Device 344d
 |           +-0e.7  Intel Corporation Device 344d
 |           +-1d.0  Intel Corporation Device 344f
 |           +-1d.1  Intel Corporation Device 3457
 |           +-1e.0  Intel Corporation Device 3458
 |           +-1e.1  Intel Corporation Device 3459
 |           +-1e.2  Intel Corporation Device 345a
 |           +-1e.3  Intel Corporation Device 345b
 |           +-1e.4  Intel Corporation Device 345c
 |           +-1e.5  Intel Corporation Device 345d
 |           +-1e.6  Intel Corporation Device 345e
 |           \-1e.7  Intel Corporation Device 345f
 +-[0000:fe]-+-00.0  Intel Corporation Device 3450
 |           +-00.1  Intel Corporation Device 3451
 |           +-00.2  Intel Corporation Device 3452
 |           +-00.3  Intel Corporation Ice Lake IEH
 |           +-00.5  Intel Corporation Device 3455
 |           +-02.0  Intel Corporation Ice Lake UPI Misc
 |           +-02.1  Intel Corporation Ice Lake UPI Link/Phy0
 |           +-02.2  Intel Corporation Device 3442
 |           +-03.0  Intel Corporation Ice Lake UPI Misc
 |           +-03.1  Intel Corporation Ice Lake UPI Link/Phy0
 |           +-03.2  Intel Corporation Device 3442
 |           +-04.0  Intel Corporation Ice Lake UPI Misc
 |           +-04.1  Intel Corporation Ice Lake UPI Link/Phy0
 |           +-04.2  Intel Corporation Device 3442
 |           +-04.3  Intel Corporation Device 3443
 |           +-05.0  Intel Corporation Device 3445
 |           +-05.1  Intel Corporation Device 3446
 |           +-05.2  Intel Corporation Device 3447
 |           +-06.0  Intel Corporation Device 3445
 |           +-06.1  Intel Corporation Device 3446
 |           +-06.2  Intel Corporation Device 3447
 |           +-07.0  Intel Corporation Device 3445
 |           +-07.1  Intel Corporation Device 3446
 |           +-07.2  Intel Corporation Device 3447
 |           +-0b.0  Intel Corporation Device 3448
 |           +-0b.1  Intel Corporation Device 3448
 |           +-0b.2  Intel Corporation Device 344b
 |           +-0c.0  Intel Corporation Device 344a
 |           +-0d.0  Intel Corporation Device 344a
 |           +-0e.0  Intel Corporation Device 344a
 |           +-0f.0  Intel Corporation Device 344a
 |           +-1a.0  Intel Corporation Device 2880
 |           +-1b.0  Intel Corporation Device 2880
 |           +-1c.0  Intel Corporation Device 2880
 |           \-1d.0  Intel Corporation Device 2880
 +-[0000:e2]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           +-00.4  Intel Corporation Ice Lake IEH
 |           \-02.0-[e3]----00.0  NVIDIA Corporation GA100 [A100 PCIe 80GB]
 +-[0000:c9]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           +-00.4  Intel Corporation Ice Lake IEH
 |           \-02.0-[ca]----00.0  NVIDIA Corporation GA100 [A100 PCIe 80GB]
 +-[0000:b0]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           \-00.4  Intel Corporation Ice Lake IEH
 +-[0000:97]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           +-00.4  Intel Corporation Ice Lake IEH
 |           \-02.0-[98]--+-00.0  Mellanox Technologies MT27800 Family [ConnectX-5]
 |                        \-00.1  Mellanox Technologies MT27800 Family [ConnectX-5]
 +-[0000:80]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           +-00.4  Intel Corporation Ice Lake IEH
 |           +-02.0  Intel Corporation Ice Lake MSM
 |           +-02.1  Intel Corporation Ice Lake PMON MSM
 |           \-02.4  Intel Corporation Ice Lake NorthPeak
 +-[0000:7f]-+-00.0  Intel Corporation Device 344c
 |           +-00.1  Intel Corporation Device 344c
 |           +-00.2  Intel Corporation Device 344c
 |           +-00.3  Intel Corporation Device 344c
 |           +-00.4  Intel Corporation Device 344c
 |           +-00.5  Intel Corporation Device 344c
 |           +-00.6  Intel Corporation Device 344c
 |           +-00.7  Intel Corporation Device 344c
 |           +-01.0  Intel Corporation Device 344c
 |           +-01.1  Intel Corporation Device 344c
 |           +-01.2  Intel Corporation Device 344c
 |           +-01.3  Intel Corporation Device 344c
 |           +-01.4  Intel Corporation Device 344c
 |           +-01.5  Intel Corporation Device 344c
 |           +-01.6  Intel Corporation Device 344c
 |           +-01.7  Intel Corporation Device 344c
 |           +-02.0  Intel Corporation Device 344c
 |           +-02.1  Intel Corporation Device 344c
 |           +-02.2  Intel Corporation Device 344c
 |           +-02.3  Intel Corporation Device 344c
 |           +-02.4  Intel Corporation Device 344c
 |           +-02.5  Intel Corporation Device 344c
 |           +-02.6  Intel Corporation Device 344c
 |           +-02.7  Intel Corporation Device 344c
 |           +-03.0  Intel Corporation Device 344c
 |           +-03.1  Intel Corporation Device 344c
 |           +-03.2  Intel Corporation Device 344c
 |           +-03.3  Intel Corporation Device 344c
 |           +-03.4  Intel Corporation Device 344c
 |           +-03.5  Intel Corporation Device 344c
 |           +-03.6  Intel Corporation Device 344c
 |           +-03.7  Intel Corporation Device 344c
 |           +-04.0  Intel Corporation Device 344c
 |           +-04.1  Intel Corporation Device 344c
 |           +-04.2  Intel Corporation Device 344c
 |           +-04.3  Intel Corporation Device 344c
 |           +-04.4  Intel Corporation Device 344c
 |           +-04.5  Intel Corporation Device 344c
 |           +-04.6  Intel Corporation Device 344c
 |           +-04.7  Intel Corporation Device 344c
 |           +-0a.0  Intel Corporation Device 344d
 |           +-0a.1  Intel Corporation Device 344d
 |           +-0a.2  Intel Corporation Device 344d
 |           +-0a.3  Intel Corporation Device 344d
 |           +-0a.4  Intel Corporation Device 344d
 |           +-0a.5  Intel Corporation Device 344d
 |           +-0a.6  Intel Corporation Device 344d
 |           +-0a.7  Intel Corporation Device 344d
 |           +-0b.0  Intel Corporation Device 344d
 |           +-0b.1  Intel Corporation Device 344d
 |           +-0b.2  Intel Corporation Device 344d
 |           +-0b.3  Intel Corporation Device 344d
 |           +-0b.4  Intel Corporation Device 344d
 |           +-0b.5  Intel Corporation Device 344d
 |           +-0b.6  Intel Corporation Device 344d
 |           +-0b.7  Intel Corporation Device 344d
 |           +-0c.0  Intel Corporation Device 344d
 |           +-0c.1  Intel Corporation Device 344d
 |           +-0c.2  Intel Corporation Device 344d
 |           +-0c.3  Intel Corporation Device 344d
 |           +-0c.4  Intel Corporation Device 344d
 |           +-0c.5  Intel Corporation Device 344d
 |           +-0c.6  Intel Corporation Device 344d
 |           +-0c.7  Intel Corporation Device 344d
 |           +-0d.0  Intel Corporation Device 344d
 |           +-0d.1  Intel Corporation Device 344d
 |           +-0d.2  Intel Corporation Device 344d
 |           +-0d.3  Intel Corporation Device 344d
 |           +-0d.4  Intel Corporation Device 344d
 |           +-0d.5  Intel Corporation Device 344d
 |           +-0d.6  Intel Corporation Device 344d
 |           +-0d.7  Intel Corporation Device 344d
 |           +-0e.0  Intel Corporation Device 344d
 |           +-0e.1  Intel Corporation Device 344d
 |           +-0e.2  Intel Corporation Device 344d
 |           +-0e.3  Intel Corporation Device 344d
 |           +-0e.4  Intel Corporation Device 344d
 |           +-0e.5  Intel Corporation Device 344d
 |           +-0e.6  Intel Corporation Device 344d
 |           +-0e.7  Intel Corporation Device 344d
 |           +-1d.0  Intel Corporation Device 344f
 |           +-1d.1  Intel Corporation Device 3457
 |           +-1e.0  Intel Corporation Device 3458
 |           +-1e.1  Intel Corporation Device 3459
 |           +-1e.2  Intel Corporation Device 345a
 |           +-1e.3  Intel Corporation Device 345b
 |           +-1e.4  Intel Corporation Device 345c
 |           +-1e.5  Intel Corporation Device 345d
 |           +-1e.6  Intel Corporation Device 345e
 |           \-1e.7  Intel Corporation Device 345f
 +-[0000:7e]-+-00.0  Intel Corporation Device 3450
 |           +-00.1  Intel Corporation Device 3451
 |           +-00.2  Intel Corporation Device 3452
 |           +-00.3  Intel Corporation Ice Lake IEH
 |           +-00.5  Intel Corporation Device 3455
 |           +-02.0  Intel Corporation Ice Lake UPI Misc
 |           +-02.1  Intel Corporation Ice Lake UPI Link/Phy0
 |           +-02.2  Intel Corporation Device 3442
 |           +-03.0  Intel Corporation Ice Lake UPI Misc
 |           +-03.1  Intel Corporation Ice Lake UPI Link/Phy0
 |           +-03.2  Intel Corporation Device 3442
 |           +-04.0  Intel Corporation Ice Lake UPI Misc
 |           +-04.1  Intel Corporation Ice Lake UPI Link/Phy0
 |           +-04.2  Intel Corporation Device 3442
 |           +-04.3  Intel Corporation Device 3443
 |           +-05.0  Intel Corporation Device 3445
 |           +-05.1  Intel Corporation Device 3446
 |           +-05.2  Intel Corporation Device 3447
 |           +-06.0  Intel Corporation Device 3445
 |           +-06.1  Intel Corporation Device 3446
 |           +-06.2  Intel Corporation Device 3447
 |           +-07.0  Intel Corporation Device 3445
 |           +-07.1  Intel Corporation Device 3446
 |           +-07.2  Intel Corporation Device 3447
 |           +-0b.0  Intel Corporation Device 3448
 |           +-0b.1  Intel Corporation Device 3448
 |           +-0b.2  Intel Corporation Device 344b
 |           +-0c.0  Intel Corporation Device 344a
 |           +-0d.0  Intel Corporation Device 344a
 |           +-0e.0  Intel Corporation Device 344a
 |           +-0f.0  Intel Corporation Device 344a
 |           +-1a.0  Intel Corporation Device 2880
 |           +-1b.0  Intel Corporation Device 2880
 |           +-1c.0  Intel Corporation Device 2880
 |           \-1d.0  Intel Corporation Device 2880
 +-[0000:64]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           +-00.4  Intel Corporation Ice Lake IEH
 |           \-02.0-[65]----00.0  NVIDIA Corporation GA100 [A100 PCIe 80GB]
 +-[0000:4a]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           \-00.4  Intel Corporation Ice Lake IEH
 +-[0000:30]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           +-00.4  Intel Corporation Ice Lake IEH
 |           +-02.0-[31]----00.0  Samsung Electronics Co Ltd NVMe SSD Controller PM174X
 |           +-03.0-[32]----00.0  Samsung Electronics Co Ltd NVMe SSD Controller PM9A1/PM9A3/980PRO
 |           \-04.0-[33]--+-00.0  Intel Corporation Ethernet Controller X710 for 10GbE SFP+
 |                        +-00.1  Intel Corporation Ethernet Controller X710 for 10GbE SFP+
 |                        +-00.2  Intel Corporation Ethernet Controller X710 for 10GbE SFP+
 |                        \-00.3  Intel Corporation Ethernet Controller X710 for 10GbE SFP+
 +-[0000:16]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
 |           +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
 |           +-00.2  Intel Corporation Ice Lake RAS
 |           +-00.4  Intel Corporation Ice Lake IEH
 |           \-02.0-[17]----00.0  NVIDIA Corporation GA100 [A100 PCIe 80GB]
 \-[0000:00]-+-00.0  Intel Corporation Ice Lake Memory Map/VT-d
             +-00.1  Intel Corporation Ice Lake Mesh 2 PCIe
             +-00.2  Intel Corporation Ice Lake RAS
             +-00.4  Intel Corporation Ice Lake IEH
             +-02.0  Intel Corporation Ice Lake MSM
             +-02.1  Intel Corporation Ice Lake PMON MSM
             +-02.4  Intel Corporation Ice Lake NorthPeak
             +-11.0  Intel Corporation C620 Series Chipset Family MROM 0
             +-11.5  Intel Corporation C620 Series Chipset Family SSATA Controller [AHCI mode]
             +-14.0  Intel Corporation C620 Series Chipset Family USB 3.0 xHCI Controller
             +-14.2  Intel Corporation C620 Series Chipset Family Thermal Subsystem
             +-16.0  Intel Corporation C620 Series Chipset Family MEI Controller #1
             +-16.1  Intel Corporation C620 Series Chipset Family MEI Controller #2
             +-16.4  Intel Corporation C620 Series Chipset Family MEI Controller #3
             +-17.0  Intel Corporation C620 Series Chipset Family SATA Controller [AHCI mode]
             +-1c.0-[01]--
             +-1c.4-[02-03]----00.0-[03]----00.0  Matrox Electronics Systems Ltd. Integrated Matrox G200eW3 Graphics Controller
             +-1c.5-[04]--+-00.0  Broadcom Inc. and subsidiaries NetXtreme BCM5720 Gigabit Ethernet PCIe
             |            \-00.1  Broadcom Inc. and subsidiaries NetXtreme BCM5720 Gigabit Ethernet PCIe
             +-1d.0-[05]----00.0  Marvell Technology Group Ltd. 88SE9230 PCIe 2.0 x2 4-port SATA 6 Gb/s RAID Controller
             +-1f.0  Intel Corporation Device a1cb
             +-1f.2  Intel Corporation C620 Series Chipset Family Power Management Controller
             +-1f.4  Intel Corporation C620 Series Chipset Family SMBus
             \-1f.5  Intel Corporation C620 Series Chipset Family SPI Controller

@kmodukuri , please check the above requested outputs. Below is some more information.

[root@gds ~]# lsmod | grep nvme
nvme                   65536  6
nvme_core             135168  8 nvme
mlx_compat             20480  13 rdma_cm,ib_ipoib,mlxdevm,nvme,iw_cm,nvme_core,ib_umad,ib_core,rdma_ucm,ib_uverbs,mlx5_ib,ib_cm,mlx5_core
t10_pi                 16384  1 nvme_core
[root@gds ~]#
[root@gds ~]# cat /proc/kallsyms | grep -i nvfs
ffffffffc09c8000 t nvfs_get_pci_dev_mapping     [nvidia_fs]
ffffffffc09c8050 t nvfs_devnode [nvidia_fs]
ffffffffc09c8070 t nvfs_get_dma_address [nvidia_fs]
ffffffffc09cf8c2 t nvfs_get_dma_address.cold    [nvidia_fs]
ffffffffc09c81d0 t nvfs_transit_state   [nvidia_fs]
ffffffffc09cf9a4 t nvfs_transit_state.cold      [nvidia_fs]
ffffffffc09c8230 t nvfs_unpin_gpu_pages [nvidia_fs]
ffffffffc09cfaab t nvfs_unpin_gpu_pages.cold    [nvidia_fs]
ffffffffc09c8350 t nvfs_count_ops       [nvidia_fs]
000000000002e010 a nvfs_n_ops   [nvidia_fs]
ffffffffc09c8560 t nvfs_open    [nvidia_fs]
ffffffffc09e4568 b nvfs_shutdown        [nvidia_fs]
ffffffffc09cfb8b t nvfs_open.cold       [nvidia_fs]
ffffffffc09c85d0 t nvfs_close   [nvidia_fs]
ffffffffc09cfbba t nvfs_close.cold      [nvidia_fs]
ffffffffc09c8670 t nvfs_pin_gpu_pages   [nvidia_fs]
ffffffffc09c8b20 t nvfs_get_pages_free_callback [nvidia_fs]
ffffffffc09cfbdc t nvfs_pin_gpu_pages.cold      [nvidia_fs]
ffffffffc09c88c0 t nvfs_map     [nvidia_fs]
ffffffffc09cfebe t nvfs_map.cold        [nvidia_fs]
ffffffffc09e3100 d nvfs_curr_devices    [nvidia_fs]
ffffffffc09c8ac0 t nvfs_io_terminate.constprop.0        [nvidia_fs]
ffffffffc09d00fe t nvfs_io_terminate.constprop.0.cold   [nvidia_fs]
ffffffffc09d0259 t nvfs_get_pages_free_callback.cold    [nvidia_fs]
ffffffffc09d0302 t nvfs_get_p2p_dma_mapping.cold        [nvidia_fs]
ffffffffc09d03a1 t nvfs_get_dma.cold    [nvidia_fs]
ffffffffc09d041f t nvfs_io_free.cold    [nvidia_fs]
ffffffffc09c9330 t nvfs_io_complete     [nvidia_fs]
ffffffffc09d0576 t nvfs_io_complete.cold        [nvidia_fs]
ffffffffc09c9570 t nvfs_direct_io       [nvidia_fs]
ffffffffc09d0589 t nvfs_direct_io.cold  [nvidia_fs]
ffffffffc09d0792 t nvfs_free_gpu_info.cold      [nvidia_fs]
ffffffffc09d07f9 t nvfs_io_init.cold    [nvidia_fs]
ffffffffc09d0d1c t nvfs_io_start_op.cold        [nvidia_fs]
ffffffffc09ca2b0 t nvfs_ioctl   [nvidia_fs]
ffffffffc09d10ba t nvfs_ioctl.cold      [nvidia_fs]
ffffffffc09e4570 b nvfs_class   [nvidia_fs]
ffffffffc09d29c9 t nvfs_exit    [nvidia_fs]
ffffffffc09ca740 t nvfs_blk_rq_map_sg_internal  [nvidia_fs]
ffffffffc09d1358 t nvfs_blk_rq_map_sg_internal.cold     [nvidia_fs]
ffffffffc09cab70 t nvfs_blk_rq_map_sg   [nvidia_fs]
ffffffffc09cab80 t nvfs_nvme_blk_rq_map_sg      [nvidia_fs]
ffffffffc09d1442 t nvfs_get_gpu_sglist_rdma_info.cold   [nvidia_fs]
ffffffffc09cadd0 t nvfs_dma_unmap_sg    [nvidia_fs]
ffffffffc09cae70 t nvfs_dma_map_sg_attrs_internal.constprop.0   [nvidia_fs]
ffffffffc09d163a t nvfs_dma_map_sg_attrs_internal.constprop.0.cold      [nvidia_fs]
ffffffffc09cb060 t nvfs_dma_map_sg_attrs_nvme   [nvidia_fs]
ffffffffc09cb070 t nvfs_dma_map_sg_attrs        [nvidia_fs]
ffffffffc09d1768 t nvfs_pfn_mkwrite     [nvidia_fs]
ffffffffc09d1788 t nvfs_page_mkwrite    [nvidia_fs]
ffffffffc09d17a8 t nvfs_vma_fault       [nvidia_fs]
ffffffffc09d17c8 t nvfs_vma_mremap      [nvidia_fs]
ffffffffc09d17ef t nvfs_vma_split       [nvidia_fs]
ffffffffc09d1819 t nvfs_vma_open        [nvidia_fs]
ffffffffc09cb0b0 t nvfs_mgroup_free     [nvidia_fs]
ffffffffc09d1846 t nvfs_mgroup_free.cold        [nvidia_fs]
ffffffffc09cb2f0 t nvfs_mgroup_put_internal     [nvidia_fs]
ffffffffc09d1871 t nvfs_mgroup_put_internal.cold        [nvidia_fs]
ffffffffc09cb330 t nvfs_vma_close       [nvidia_fs]
ffffffffc09d1884 t nvfs_vma_close.cold  [nvidia_fs]
ffffffffc09cb3f0 t nvfs_get_mgroup_from_vaddr_internal  [nvidia_fs]
ffffffffc09e45a0 b nvfs_io_mgroup_hash  [nvidia_fs]
ffffffffc09d1972 t nvfs_get_mgroup_from_vaddr_internal.cold     [nvidia_fs]
ffffffffc09cb580 t __nvfs_mgroup_from_page.part.0       [nvidia_fs]
ffffffffc09d1a7c t __nvfs_mgroup_from_page.part.0.cold  [nvidia_fs]
ffffffffc09cb730 t nvfs_mgroup_mmap_internal.constprop.0        [nvidia_fs]
ffffffffc09d73c0 r nvfs_mmap_ops        [nvidia_fs]
ffffffffc09d1ad0 t nvfs_mgroup_mmap_internal.constprop.0.cold   [nvidia_fs]
ffffffffc09d1c51 t nvfs_mgroup_get.cold [nvidia_fs]
ffffffffc09d1ca0 t nvfs_mgroup_put.cold [nvidia_fs]
ffffffffc09d1cb3 t nvfs_mgroup_put_dma.cold     [nvidia_fs]
ffffffffc09d1cc6 t nvfs_get_mgroup_from_vaddr.cold      [nvidia_fs]
ffffffffc09d1ce6 t nvfs_mgroup_unpin_shadow_pages.cold  [nvidia_fs]
ffffffffc09d1cf9 t nvfs_mgroup_mmap.cold        [nvidia_fs]
ffffffffc09d1d43 t nvfs_mgroup_check_and_set.cold       [nvidia_fs]
ffffffffc09d1edc t nvfs_mgroup_pin_shadow_pages.cold    [nvidia_fs]
ffffffffc09d20c2 t nvfs_mgroup_fill_mpages.cold [nvidia_fs]
ffffffffc09d210a t nvfs_mgroup_from_page_range.cold     [nvidia_fs]
ffffffffc09d2150 t nvfs_mgroup_metadata_set_dma_state.cold      [nvidia_fs]
ffffffffc09d21ec t nvfs_mgroup_from_page.cold   [nvidia_fs]
ffffffffc09d221e t nvfs_gpu_index.cold  [nvidia_fs]
ffffffffc09ccc60 t nvfs_pcie_acs_enabled        [nvidia_fs]
ffffffffc09cccf0 t __nvfs_gpu2peer_numa_distance        [nvidia_fs]
ffffffffc09d22bb t __nvfs_gpu2peer_numa_distance.cold   [nvidia_fs]
ffffffffc09ccdf0 t __nvfs_get_gpu2peer_distance [nvidia_fs]
ffffffffc09d22fa t __nvfs_get_gpu2peer_distance.cold    [nvidia_fs]
ffffffffc09ccfe0 t __nvfs_find_all_device_paths.constprop.0     [nvidia_fs]
ffffffffc09d23b6 t __nvfs_find_all_device_paths.constprop.0.cold        [nvidia_fs]
ffffffffc09d2576 t nvfs_create_gpu_hash_entry.cold      [nvidia_fs]
ffffffffc09d2587 t nvfs_create_peer_hash_entry.cold     [nvidia_fs]
ffffffffc09d2598 t nvfs_get_gpu_hash_index.cold [nvidia_fs]
ffffffffc09d25a9 t nvfs_get_peer_hash_index.cold        [nvidia_fs]
ffffffffc09d25ba t nvfs_fill_gpu2peer_distance_table_once.cold  [nvidia_fs]
ffffffffc09d25ed t nvfs_get_gpu2peer_distance.cold      [nvidia_fs]
ffffffffc09d2631 t nvfs_update_peer_usage.cold  [nvidia_fs]
ffffffffc09d2647 t nvfs_aggregate_peer_usage_by_distance.cold   [nvidia_fs]
ffffffffc09d265d t nvfs_aggregate_cross_peer_usage.cold [nvidia_fs]
ffffffffc09cdea0 t nvfs_version_show    [nvidia_fs]
ffffffffc09cdf40 t nvfs_modules_open    [nvidia_fs]
ffffffffc09cdf60 t nvfs_version_info_open       [nvidia_fs]
ffffffffc09cdf80 t nvfs_pci_distance_map_info_open      [nvidia_fs]
ffffffffc09cdfa0 t nvfs_peer_affinity_info_open [nvidia_fs]
ffffffffc09cdfc0 t nvfs_bridge_info_open        [nvidia_fs]
ffffffffc09ce000 t nvfs_bridge_show     [nvidia_fs]
ffffffffc09cdfe0 t nvfs_devices_info_open       [nvidia_fs]
ffffffffc09ce060 t nvfs_devices_show    [nvidia_fs]
ffffffffc09d7e40 r nvfs_devices_ops     [nvidia_fs]
ffffffffc09d7ea0 r nvfs_bridge_ops      [nvidia_fs]
ffffffffc09d7de0 r nvfs_peer_affinity_ops       [nvidia_fs]
ffffffffc09d7d80 r nvfs_pci_distance_map_ops    [nvidia_fs]
ffffffffc09d2672 t nvfs_proc_init.cold  [nvidia_fs]
ffffffffc09d2688 t nvfs_proc_cleanup.cold       [nvidia_fs]
ffffffffc09ce3c0 t nvfs_stats_open      [nvidia_fs]
ffffffffc09ce650 t nvfs_stats_show      [nvidia_fs]
ffffffffc09ce3e0 t nvfs_print_gpuinfo   [nvidia_fs]
ffffffffc0a08ba0 b nvfs_gpu_stat_hash   [nvidia_fs]
ffffffffc09cea40 t nvfs_stats_clear     [nvidia_fs]
ffffffffc09d26d1 t nvfs_update_alloc_gpustat.cold       [nvidia_fs]
ffffffffc09d26f5 t nvfs_set_rdma_reg_info_to_mgroup.cold        [nvidia_fs]
ffffffffc09d27b2 t nvfs_get_rdma_reg_info_from_mgroup.cold      [nvidia_fs]
ffffffffc09d285e t nvfs_clear_rdma_reg_info_in_mgroup.cold      [nvidia_fs]
ffffffffc09d28a0 t nvfs_io_batch_init.cold      [nvidia_fs]
ffffffffc09d2960 t nvfs_io_batch_submit.cold    [nvidia_fs]
ffffffffc09cbc40 t nvfs_mgroup_put      [nvidia_fs]
ffffffffc0a08a98 b nvfs_n_write_bytes   [nvidia_fs]
ffffffffc09ccb60 t nvfs_check_gpu_page_and_error        [nvidia_fs]
ffffffffc09cab90 t nvfs_get_gpu_sglist_rdma_info        [nvidia_fs]
ffffffffc0a089c0 b nvfs_n_pg_cache_eio  [nvidia_fs]
ffffffffc0a08a58 b nvfs_n_mmap_err      [nvidia_fs]
ffffffffc09cbd90 t nvfs_mgroup_init     [nvidia_fs]
ffffffffc0a08ad0 b nvfs_n_reads_sparse_files    [nvidia_fs]
ffffffffc0a08ae8 b nvfs_n_batches       [nvidia_fs]
ffffffffc09ce360 t nvfs_check_access    [nvidia_fs]
ffffffffc0a08af0 b nvfs_batch_submit_avg_latency        [nvidia_fs]
ffffffffc0a08a40 b nvfs_n_maps_ok       [nvidia_fs]
ffffffffc09e3440 d nvfs_sfxv_dma_rw_ops [nvidia_fs]
ffffffffc09cd730 t nvfs_get_next_acs_device     [nvidia_fs]
ffffffffc09e4540 b nvfs_peer_stats_enabled      [nvidia_fs]
ffffffffc09cec70 t nvfs_update_free_gpustat     [nvidia_fs]
ffffffffc09e3000 d nvfs_dev_fops        [nvidia_fs]
ffffffffc0a08a48 b nvfs_n_maps  [nvidia_fs]
ffffffffc0a08a00 b nvfs_n_op_batches    [nvidia_fs]
ffffffffc09cd470 t nvfs_create_gpu_hash_entry   [nvidia_fs]
ffffffffc09ce390 t nvfs_extend_sg_markers       [nvidia_fs]
ffffffffc09cbdc0 t nvfs_mgroup_check_and_set    [nvidia_fs]
ffffffffc09cc390 t nvfs_mgroup_pin_shadow_pages [nvidia_fs]
ffffffffc09cd710 t nvfs_lookup_peer_hash_index_entry    [nvidia_fs]
ffffffffc0a08a88 b nvfs_write_bytes_per_sec     [nvidia_fs]
ffffffffc09cdbf0 t nvfs_peer_distance_show      [nvidia_fs]
ffffffffc09cf2a0 t nvfs_update_write_throughput [nvidia_fs]
ffffffffc0a08a0c b nvfs_n_op_writes     [nvidia_fs]
ffffffffc09c8a10 t nvfs_get_device_count        [nvidia_fs]
ffffffffc09cee80 t nvfs_update_read_throughput  [nvidia_fs]
ffffffffc09cc9c0 t nvfs_mgroup_metadata_set_dma_state   [nvidia_fs]
ffffffffc0a08b08 b nvfs_read_latency_per_sec    [nvidia_fs]
ffffffffc09cdbb0 t nvfs_reset_peer_affinity_stats       [nvidia_fs]
ffffffffc09c94f0 t nvfs_rw_verify_area  [nvidia_fs]
ffffffffc0a08a50 b nvfs_n_munmap        [nvidia_fs]
ffffffffc09d7cc0 r nvfs_pcie_link_speed_table   [nvidia_fs]
ffffffffc0a08b10 b nvfs_read_ops_per_sec        [nvidia_fs]
ffffffffc0a08a80 b nvfs_write_ops_per_sec       [nvidia_fs]
ffffffffc0a08aa8 b nvfs_n_writes_ok     [nvidia_fs]
ffffffffc0a08b38 b nvfs_n_reads_ok      [nvidia_fs]
ffffffffc09c8cd0 t nvfs_get_p2p_dma_mapping     [nvidia_fs]
ffffffffc0a08ac0 b nvfs_n_reads_sparse_region   [nvidia_fs]
ffffffffc09cbc80 t nvfs_mgroup_put_dma  [nvidia_fs]
ffffffffc09cded0 t nvfs_modules_show    [nvidia_fs]
ffffffffc0a08a04 b nvfs_n_op_process    [nvidia_fs]
ffffffffc09cda10 t nvfs_update_peer_usage       [nvidia_fs]
ffffffffc0a08a60 b nvfs_n_mmap_ok       [nvidia_fs]
ffffffffc09cf5d0 t nvfs_clear_rdma_reg_info_in_mgroup   [nvidia_fs]
ffffffffc0a08ad8 b nvfs_n_batch_err     [nvidia_fs]
ffffffffc0a08a78 b nvfs_write_latency_per_sec   [nvidia_fs]
ffffffffc09c9190 t nvfs_io_free [nvidia_fs]
ffffffffc09e3108 d nvfs_info_enabled    [nvidia_fs]
ffffffffc09c9a00 t nvfs_io_init [nvidia_fs]
ffffffffc09cb0a0 t nvfs_blk_unregister_dma_ops  [nvidia_fs]
ffffffffc09cf530 t nvfs_get_rdma_reg_info_from_mgroup   [nvidia_fs]
ffffffffc09cc5e0 t nvfs_mgroup_fill_mpages      [nvidia_fs]
ffffffffc0a08b04 b nvfs_avg_read_latency        [nvidia_fs]
ffffffffc09cd650 t nvfs_lookup_gpu_hash_index_entry     [nvidia_fs]
ffffffffc0a08b18 b nvfs_read_bytes_per_sec      [nvidia_fs]
ffffffffc09cd770 t nvfs_fill_gpu2peer_distance_table_once       [nvidia_fs]
ffffffffc0a089c8 b nvfs_n_pg_cache      [nvidia_fs]
ffffffffc09ce090 t nvfs_proc_init       [nvidia_fs]
ffffffffc0a08a18 b nvfs_n_active_shadow_buf_sz  [nvidia_fs]
ffffffffc09cddb0 t nvfs_peer_affinity_show      [nvidia_fs]
ffffffffc0a08b28 b nvfs_n_read_bytes    [nvidia_fs]
ffffffffc09ccbf0 t nvfs_gpu_index       [nvidia_fs]
ffffffffc09cf440 t nvfs_set_rdma_reg_info_to_mgroup     [nvidia_fs]
ffffffffc0a089f0 b nvfs_n_err_sg_err    [nvidia_fs]
ffffffffc09d80e0 r nvfs_stats_fops      [nvidia_fs]
ffffffffc09ced20 t nvfs_update_alloc_gpustat    [nvidia_fs]
ffffffffc09c9100 t nvfs_io_map_sparse_data      [nvidia_fs]
ffffffffc09e3104 d nvfs_max_devices     [nvidia_fs]
ffffffffc09cc830 t nvfs_mgroup_from_page_range  [nvidia_fs]
ffffffffc09cf080 t nvfs_update_batch_latency    [nvidia_fs]
ffffffffc0a089c4 b nvfs_n_pg_cache_fail [nvidia_fs]
ffffffffc0a08a20 b nvfs_n_delayed_frees [nvidia_fs]
ffffffffc09cca90 t nvfs_mgroup_from_page        [nvidia_fs]
ffffffffc09cef70 t nvfs_update_read_latency     [nvidia_fs]
ffffffffc09ce1c0 t nvfs_proc_cleanup    [nvidia_fs]
ffffffffc09e3120 d nvfs_module_mutex    [nvidia_fs]
ffffffffc09ccb10 t nvfs_is_gpu_page     [nvidia_fs]
ffffffffc09c8ef0 t nvfs_get_dma [nvidia_fs]
ffffffffc09cf190 t nvfs_update_write_latency    [nvidia_fs]
ffffffffc0a08a30 b nvfs_n_free  [nvidia_fs]
ffffffffc09cbb90 t nvfs_mgroup_put_ref  [nvidia_fs]
ffffffffc09cc7f0 t nvfs_mgroup_get_gpu_physical_address [nvidia_fs]
ffffffffc09cbb80 t nvfs_mgroup_get_ref  [nvidia_fs]
ffffffffc0a08a68 b nvfs_n_mmap  [nvidia_fs]
ffffffffc09c9e60 t nvfs_io_start_op     [nvidia_fs]
ffffffffc0a089ec b nvfs_n_err_dma_map   [nvidia_fs]
ffffffffc09d7f00 r nvfs_version_ops     [nvidia_fs]
ffffffffc09cf650 t nvfs_io_batch_init   [nvidia_fs]
ffffffffc09e34c0 d nvfs_dev_dma_rw_ops  [nvidia_fs]
ffffffffc0a08a10 b nvfs_n_op_reads      [nvidia_fs]
ffffffffc0a08b34 b nvfs_n_read_err      [nvidia_fs]
ffffffffc0a08b40 b nvfs_n_reads [nvidia_fs]
ffffffffc09e4544 b nvfs_rw_stats_enabled        [nvidia_fs]
ffffffffc09e3480 d nvfs_nvme_dma_rw_ops [nvidia_fs]
ffffffffc09cbcc0 t nvfs_get_mgroup_from_vaddr   [nvidia_fs]
ffffffffc09cd510 t nvfs_create_peer_hash_entry  [nvidia_fs]
ffffffffc09e33c0 d nvfs_ibm_scale_rdma_ops      [nvidia_fs]
ffffffffc09cf3c0 t nvfs_stat_destroy    [nvidia_fs]
ffffffffc09cc7b0 t nvfs_mgroup_get_gpu_index_and_off    [nvidia_fs]
ffffffffc09cd5b0 t nvfs_get_gpu_hash_index      [nvidia_fs]
ffffffffc0a08ab0 b nvfs_n_writes        [nvidia_fs]
ffffffffc09d7f60 r nvfs_module_ops      [nvidia_fs]
ffffffffc0a08a38 b nvfs_n_map_err       [nvidia_fs]
ffffffffc0a08a08 b nvfs_n_op_maps       [nvidia_fs]
ffffffffc0a08aa4 b nvfs_n_write_err     [nvidia_fs]
ffffffffc0a08af8 b nvfs_batch_submit_latency_per_sec    [nvidia_fs]
ffffffffc0a08b20 b nvfs_read_throughput [nvidia_fs]
ffffffffc0a08a90 b nvfs_write_throughput        [nvidia_fs]
ffffffffc0a089e8 b nvfs_n_err_dma_ref   [nvidia_fs]
ffffffffc09cd910 t nvfs_get_gpu2peer_distance   [nvidia_fs]
ffffffffc0a08aa0 b nvfs_n_write_iostate_err     [nvidia_fs]
ffffffffc0a08ac8 b nvfs_n_reads_sparse_io       [nvidia_fs]
ffffffffc0a089f4 b nvfs_n_err_mix_cpu_gpu       [nvidia_fs]
ffffffffc09d7cb0 r nvfs_pcie_link_width_table   [nvidia_fs]
ffffffffc09cbba0 t nvfs_mgroup_get      [nvidia_fs]
ffffffffc09cbcf0 t nvfs_mgroup_unpin_shadow_pages       [nvidia_fs]
ffffffffc09cbd30 t nvfs_mgroup_mmap     [nvidia_fs]
ffffffffc0a08ae0 b nvfs_n_batches_ok    [nvidia_fs]
ffffffffc09e4548 b nvfs_dbg_enabled     [nvidia_fs]
ffffffffc09cf830 t nvfs_io_batch_submit [nvidia_fs]
ffffffffc09cd670 t nvfs_get_peer_hash_index     [nvidia_fs]
ffffffffc09e3400 d nvfs_nvmesh_dma_rw_ops       [nvidia_fs]
ffffffffc09c8a20 t nvfs_io_terminate_requested  [nvidia_fs]
ffffffffc0a08ab8 b nvfs_n_reads_sparse_pages    [nvidia_fs]
ffffffffc0a08a70 b nvfs_avg_write_latency       [nvidia_fs]
ffffffffc09cf390 t nvfs_stat_init       [nvidia_fs]
ffffffffc09ccc50 t nvfs_device_priority [nvidia_fs]
ffffffffc0a08b30 b nvfs_n_read_iostate_err      [nvidia_fs]
ffffffffc09cb090 t nvfs_blk_register_dma_ops    [nvidia_fs]
ffffffffc09cdb40 t nvfs_aggregate_cross_peer_usage      [nvidia_fs]
ffffffffc0a08b00 b nvfs_batch_ops_per_sec       [nvidia_fs]
ffffffffc09c9160 t nvfs_io_unmap_sparse_data    [nvidia_fs]
ffffffffc09cdad0 t nvfs_aggregate_peer_usage_by_distance        [nvidia_fs]
ffffffffc09c98d0 t nvfs_free_gpu_info   [nvidia_fs]
ffffffffc0a08a28 b nvfs_n_callbacks     [nvidia_fs]
ffffffffc0511000 t nvme_nvfs_unmap_data [nvme]
ffffffffc0512f10 t nvme_nvfs_map_data.constprop.0       [nvme]
ffffffffc0516834 t nvme_nvfs_map_data.constprop.0.cold  [nvme]
ffffffffc0517eb2 r __kstrtab_nvme_v1_register_nvfs_dma_ops      [nvme]
ffffffffc0517ed0 r __kstrtabns_nvme_v1_register_nvfs_dma_ops    [nvme]
ffffffffc0517070 r __ksymtab_nvme_v1_register_nvfs_dma_ops      [nvme]
ffffffffc0517ed1 r __kstrtab_nvme_v1_unregister_nvfs_dma_ops    [nvme]
ffffffffc0517ef1 r __kstrtabns_nvme_v1_unregister_nvfs_dma_ops  [nvme]
ffffffffc051707c r __ksymtab_nvme_v1_unregister_nvfs_dma_ops    [nvme]
000000000002e000 a nvfs_n_ops   [nvme]
ffffffffc0516530 T nvme_v1_unregister_nvfs_dma_ops      [nvme]
ffffffffc051d798 b nvfs_ops     [nvme]
ffffffffc05164f0 T nvme_v1_register_nvfs_dma_ops        [nvme]
ffffffffc051d2d0 d nvfs_shutdown        [nvme]

[99097.712359] blk_print_req_error: 6 callbacks suppressed
[99097.712361] I/O error, dev nvme1n1, sector 33292816 op 0x1:(WRITE) flags 0x8800 phys_seg 2 prio class 2
[99097.712372] nvidia-fs:write IO failed :-5

This indicates that the IO request had an error at nvme layer. could this drive be old and having errors ?
Does same test with -x 1 and -x 2 pass consistently ?

otherwise you can check to see if the disk has errors.

nvme error-log /dev/nvme1n1

Also can you share the output of following command to see if protection bits and LBA formatting are done correctly.

nvme id-ns /dev/nvme1n1 -H

[root@gds ~]# nvme error-log /dev/nvme1n1
Error Log Entries for device:nvme1n1 entries:64
.................
 Entry[ 0]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 1]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 2]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 3]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 4]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 5]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 6]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 7]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 8]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[ 9]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[10]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[11]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[12]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[13]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[14]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[15]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[16]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[17]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[18]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[19]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[20]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[21]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[22]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[23]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[24]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[25]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[26]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[27]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[28]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[29]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[30]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[31]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[32]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[33]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[34]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[35]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[36]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[37]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[38]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[39]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[40]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[41]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[42]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[43]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[44]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[45]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[46]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[47]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[48]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[49]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[50]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[51]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[52]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[53]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[54]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[55]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[56]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[57]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[58]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[59]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[60]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[61]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[62]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................
 Entry[63]
.................
error_count     : 0
sqid            : 0
cmdid           : 0
status_field    : 0(Successful Completion: The command completed without error)
phase_tag       : 0
parm_err_loc    : 0
lba             : 0
nsid            : 0
vs              : 0
trtype          : The transport type is not indicated or the error is not transport related.
cs              : 0
trtype_spec_info: 0
.................