Nvidia-smi No devices were found for my user only, RHEL9

nvidia-bug-report.log.gz (1.4 MB)

I don’t have anything in .bashrc nor .bash_profile. root and other users can run the cmd successfully. I restarted cuda-driver as well as nvidia-persistenced services. Is this a permission issue? Here is a strace and debug log attached.

 strace nvidia-smi
execve("/cm/local/apps/cuda/libs/current/bin/nvidia-smi", ["nvidia-smi"], 0x7fffffffc480 /* 122 vars */) = 0
brk(NULL)                               = 0x69d000
arch_prctl(0x3001 /* ARCH_??? */, 0x7fffffffc2c0) = -1 EINVAL (Invalid argument)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x155555516000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/glibc-hwcaps/x86-64-v3/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/glibc-hwcaps/x86-64-v3", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/glibc-hwcaps/x86-64-v2/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/glibc-hwcaps/x86-64-v2", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/tls", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/glibc-hwcaps/x86-64-v3/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/glibc-hwcaps/x86-64-v3", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/glibc-hwcaps/x86-64-v2/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/glibc-hwcaps/x86-64-v2", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/tls", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib", {st_mode=S_IFDIR|0755, st_size=8192, ...}, 0) = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v3/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v3", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v2/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v2", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm", {st_mode=S_IFDIR|0755, st_size=8192, ...}, 0) = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v3/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v3", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v2/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v2", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64", 0x7fffffffb500, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=58087, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 58087, PROT_READ, MAP_PRIVATE, 3, 0) = 0x155555507000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=15960, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555502000
mmap(0x155555503000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555503000
mmap(0x155555504000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555504000
mmap(0x155555505000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555505000
mmap(0x155555506000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555506000
close(3)                                = 0
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\363\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=905848, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 892944, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555427000
mmap(0x155555436000, 458752, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xf000) = 0x155555436000
mmap(0x1555554a6000, 368640, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7f000) = 0x1555554a6000
mmap(0x155555500000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xd8000) = 0x155555500000
close(3)                                = 0
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=15936, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555422000
mmap(0x155555423000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555423000
mmap(0x155555424000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555424000
mmap(0x155555425000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555425000
mmap(0x155555426000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555426000
close(3)                                = 0
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220P\4\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0y\356%$[\271\321\0350\340\225\347\356&)\252"..., 68, 896) = 68
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=2389384, ...}, AT_EMPTY_PATH) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2136752, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555218000
mprotect(0x155555244000, 1880064, PROT_NONE) = 0
mmap(0x155555244000, 1531904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2c000) = 0x155555244000
mmap(0x1555553ba000, 344064, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1a2000) = 0x1555553ba000
mmap(0x15555540f000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1f6000) = 0x15555540f000
mmap(0x155555415000, 51888, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555415000
close(3)                                = 0
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/librt.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/cuda12.1/toolkit/12.1.1/targets/x86_64-linux/lib/librt.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/librt.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/librt.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/librt.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=16656, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555213000
mmap(0x155555214000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555214000
mmap(0x155555215000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555215000
mmap(0x155555216000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555216000
mmap(0x155555217000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555217000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x155555211000
arch_prctl(ARCH_SET_FS, 0x155555212300) = 0
set_tid_address(0x1555552125d0)         = 299934
set_robust_list(0x1555552125e0, 24)     = 0
mprotect(0x15555540f000, 12288, PROT_READ) = 0
mprotect(0x155555216000, 4096, PROT_READ) = 0
mprotect(0x155555425000, 4096, PROT_READ) = 0
mprotect(0x155555500000, 4096, PROT_READ) = 0
mprotect(0x155555505000, 4096, PROT_READ) = 0
mprotect(0x681000, 86016, PROT_READ)    = 0
mprotect(0x155555552000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=RLIM64_INFINITY, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x155555507000, 58087)           = 0
getrandom("\x15\x46\xca\xb7\x72\x26\x83\x4a", 8, GRND_NONBLOCK) = 8
brk(NULL)                               = 0x69d000
brk(0x6be000)                           = 0x6be000
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/libnvidia-ml.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 <\1\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=1806968, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16449256, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155554261000
mprotect(0x1555543c9000, 2093056, PROT_NONE) = 0
mmap(0x1555545c8000, 335872, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x167000) = 0x1555545c8000
mmap(0x15555461a000, 12545768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x15555461a000
close(3)                                = 0
mprotect(0x1555545c8000, 331776, PROT_READ) = 0
getpid()                                = 299934
openat(AT_FDCWD, "/proc/modules", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "nvidia_peermem 16384 0 - Live 0x"..., 1024) = 1024
read(3, "0000000000000\nrpcrdma 98304 4 - "..., 1024) = 1024
close(3)                                = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(3)                                = 0
stat("/dev/nvidiactl", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0xff), ...}) = 0
openat(AT_FDCWD, "/dev/nvidiactl", O_RDWR) = 3
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xd2, 0x48), 0x7fffffffba30) = 0
openat(AT_FDCWD, "/sys/devices/system/memory/block_size_bytes", O_RDONLY) = 4
read(4, "80000000\n", 99)               = 9
close(4)                                = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xd6, 0x8), 0x7fffffffbac0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xc8, 0x900), 0x15555520f4a0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2b, 0x20), 0x7fffffffbb90) = 0
openat(AT_FDCWD, "/proc/devices", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "Character devices:\n  1 mem\n  4 /"..., 1024) = 888
close(4)                                = 0
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/config", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 1\nDeviceFileMod"..., 1024) = 59
close(4)                                = 0
mkdir("/dev/nvidia-caps", 0755)         = -1 EEXIST (File exists)
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/config", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 1\nDeviceFileMod"..., 1024) = 59
read(4, "", 1024)                       = 0
close(4)                                = 0
stat("/dev/nvidia-caps/nvidia-cap1", {st_mode=S_IFCHR|0400, st_rdev=makedev(0x1fa, 0x1), ...}) = 0
access("/dev/nvidia-caps/nvidia-cap1", R_OK) = -1 EACCES (Permission denied)
openat(AT_FDCWD, "/proc/devices", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "Character devices:\n  1 mem\n  4 /"..., 1024) = 888
close(4)                                = 0
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/monitor", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 2\nDeviceFileMod"..., 1024) = 59
close(4)                                = 0
mkdir("/dev/nvidia-caps", 0755)         = -1 EEXIST (File exists)
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/monitor", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 2\nDeviceFileMod"..., 1024) = 59
read(4, "", 1024)                       = 0
close(4)                                = 0
stat("/dev/nvidia-caps/nvidia-cap2", {st_mode=S_IFCHR|0444, st_rdev=makedev(0x1fa, 0x2), ...}) = 0
access("/dev/nvidia-caps/nvidia-cap2", R_OK) = 0
openat(AT_FDCWD, "/dev/nvidia-caps/nvidia-cap2", O_RDONLY|O_CLOEXEC) = 4
fcntl(4, F_GETFD)                       = 0x1 (flags FD_CLOEXEC)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2b, 0x28), 0x7fffffffbb00) = 0
close(4)                                = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia2", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x2), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia2", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia2", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia3", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x3), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia3", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia3", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia1", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x1), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia1", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia1", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia0", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia0", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia0", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia6", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x6), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia6", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia6", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia7", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x7), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia7", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia7", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia5", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x5), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia5", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia5", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb0f0) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia4", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x4), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia4", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia4", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffff8c80) = 0
getpid()                                = 299934
newfstatat(1, "", {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x2), ...}, AT_EMPTY_PATH) = 0
write(1, "No devices were found\n", 22No devices were found
) = 22
getpid()                                = 299934
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffaef0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x29, 0x10), 0x7fffffffb960) = 0
close(3)                                = 0
exit_group(6)                           = ?
+++ exited with 6 +++

Looks like a permission issue. In many distros, you’ll have to be in the “video” group to have access to the nvidia gpu. Please post the output of
ls -l /dev/nvid*

Thanks so much for the response. This appears to be isolated to my user. Other users do not get this error. With my user, here is the ls -l you requested:

$ ls -l /dev/nvid*
crw-rw-rw- 1 root root 195,   0 Nov  3 22:41 /dev/nvidia0
crw-rw-rw- 1 root root 195,   1 Nov  3 22:41 /dev/nvidia1
crw-rw-rw- 1 root root 195,   2 Nov  3 22:41 /dev/nvidia2
crw-rw-rw- 1 root root 195,   3 Nov  3 22:41 /dev/nvidia3
crw-rw-rw- 1 root root 195,   4 Nov  3 22:41 /dev/nvidia4
crw-rw-rw- 1 root root 195,   5 Nov  3 22:41 /dev/nvidia5
crw-rw-rw- 1 root root 195,   6 Nov  3 22:41 /dev/nvidia6
crw-rw-rw- 1 root root 195,   7 Nov  3 22:41 /dev/nvidia7
crw-rw-rw- 1 root root 195, 255 Nov  3 22:41 /dev/nvidiactl
crw-rw-rw- 1 root root 503,   0 Nov  3 22:41 /dev/nvidia-uvm
crw-rw-rw- 1 root root 503,   1 Nov  3 22:41 /dev/nvidia-uvm-tools

/dev/nvidia-caps:
total 0
cr-------- 1 root root 506, 1 Nov  3 22:41 nvidia-cap1
cr--r--r-- 1 root root 506, 2 Nov  3 22:41 nvidia-cap2

That’s really odd, no idea. The dri nodes are set to “video” group but this shouldn’t keep you from using nvidia-smi.

Can you share the output of env | sort? If you happen to have sudo privileges try running sudo nvidia-smi (feel free to redact or modify any personal/org related information).

yeah it has to be something in my env:

 env | sort
 *)
 ;;
 ;;
 ;;
 ;;
}
}
}
}
}
BASH_FUNC_ml%%=() {  module ml "$@"
BASH_FUNC_module%%=() {  _module_raw "$@" 2>&1
BASH_FUNC__module_raw%%=() {  unset _mlshdbg;
BASH_FUNC_switchml%%=() {  typeset swfound=1;
BASH_FUNC_which%%=() {  ( alias;
 case "$-" in 
CMD_WLM_CLUSTER_NAME=ourcluster
CPATH=/cm/shared/apps/slurm/current/include
CPATH_modshare=/cm/shared/apps/slurm/current/include:1
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/547289/bus
 do
 done;
 echo "Cannot switch to Modules $swname version, command not found";
 echo "Switching to Modules $swname version";
 else
 else
 else
 else
ENABLE_LMOD=0
 esac;
 eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot "$@"
 eval `eval ${_mlre} /usr/bin/tclsh /cm/local/apps/environment-modules/4.5.3/libexec/modulecmd.tcl bash '"$@"'`;
 eval `/usr/bin/tclsh /cm/local/apps/environment-modules/4.5.3/libexec/modulecmd.tcl bash "$@"`;
 export MODULES_USE_COMPAT_VERSION;
 fi
 fi;
 fi;
 fi;
 fi;
 fi;
 fi;
 fi;
 fi;
 fi;
 fi;
 for _mlv in ${MODULES_RUN_QUARANTINE:-};
HISTCONTROL=ignoredups
HISTSIZE=1000
HOME=/myhome/home/me
HOSTNAME=mlogin001
 if [ "${_mlv}" = "${_mlv##*[!A-Za-z0-9_]}" -a "${_mlv}" = "${_mlv#[0-9]}" ]; then
 if [ "${MODULES_SILENT_SHELL_DEBUG:-0}" = '1' ]; then
 if [ "${MODULES_USE_COMPAT_VERSION:-0}" = '1' ]; then
 if [ $swfound -eq 0 ]; then
 if [ -e /cm/local/apps/environment-modules/4.5.3//libexec/modulecmd-compat ]; then
 if [ -e /cm/local/apps/environment-modules/4.5.3//libexec/modulecmd.tcl ]; then
 if [ -n "${IFS+x}" ]; then
 if [ -n "${_mlIFS+x}" ]; then
 if [ -n "${_mlre:-}" ]; then
 if [ -n "${_mlshdbg:-}" ]; then
 if [ -n "`eval 'echo ${'$_mlv'+x}'`" ]; then
 IFS=' ';
 IFS=$_mlIFS;
KRB5CCNAME=KEYRING:persistent:547289
LANG=en_US.UTF-8
LD_LIBRARY_PATH=/cm/shared/apps/slurm/current/lib64/slurm:/cm/shared/apps/slurm/current/lib64
LD_LIBRARY_PATH_modshare=/cm/shared/apps/slurm/current/lib64:1:/cm/shared/apps/slurm/current/lib64/slurm:1
LESSOPEN=||/usr/bin/lesspipe.sh %s
LIBRARY_PATH=/cm/shared/apps/slurm/current/lib64/slurm:/cm/shared/apps/slurm/current/lib64
LIBRARY_PATH_modshare=/cm/shared/apps/slurm/current/lib64:1:/cm/shared/apps/slurm/current/lib64/slurm:1
_LMFILES_=/cm/local/modulefiles/slurm/ourcluster/23.02.4
_LMFILES__modshare=/cm/local/modulefiles/slurm/ourcluster/23.02.4:1
LMOD_CACHED_LOADS=yes
LMOD_SHORT_TIME=1
LOADEDMODULES_modshare=slurm/ourcluster/23.02.4:1
LOADEDMODULES=slurm/ourcluster/23.02.4
LOGNAME=me
LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.m4a=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.oga=01;36:*.opus=01;36:*.spx=01;36:*.xspf=01;36:
MAIL=/var/spool/mail/me
MANPATH=/cm/shared/apps/slurm/current/man:/cm/local/apps/environment-modules/4.5.3//share/man:/usr/local/share/man:/usr/share/man:/cm/local/apps/environment-modules/current/share/man:/cm/local/apps/environment-modules/current/share/man
MANPATH_modshare=/usr/local/share/man:1:/cm/local/apps/environment-modules/4.5.3//share/man:1:/cm/local/apps/environment-modules/current/share/man:1:/usr/share/man:1:/cm/shared/apps/slurm/current/man:1
 _mlIFS=$IFS;
 _mlre="${_mlre:-}${_mlv}='`eval 'echo ${'$_mlrv':-}'`' ";
 _mlre="${_mlre:-}${_mlv}_modquar='`eval 'echo ${'$_mlv'}'`' ";
 _mlrv="MODULES_RUNENV_${_mlv}";
 _mlshdbg=''
 _mlshdbg='v'
 _mlshdbg='vx'
 _mlshdbg='x'
 _mlstatus=$?;
MODULEPATH=/cm/local/modulefiles:/cm/shared/modulefiles
MODULES_CMD=/cm/local/apps/environment-modules/4.5.3/libexec/modulecmd.tcl
MODULESHOME=/cm/local/apps/environment-modules/4.5.3/
MODULES_SET_SHELL_STARTUP=0
 MODULES_USE_COMPAT_VERSION=1;
MOTD_SHOWN=pam
PATH=/cm/shared/apps/slurm/current/sbin:/cm/shared/apps/slurm/current/bin:/cm/local/apps/environment-modules/4.5.3//bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/usr/sbin:/cm/local/apps/environment-modules/4.5.3/bin:/burg/home/me/bin
PATH_modshare=/cm/shared/apps/slurm/current/bin:1:/cm/local/apps/environment-modules/4.5.3//bin:1:/usr/sbin:1:/usr/bin:1:/cm/local/apps/environment-modules/4.5.3/bin:1:/usr/local/sbin:1:/cm/shared/apps/slurm/current/sbin:1:/usr/local/bin:1:/sbin:1
PS1=\u@\h:\w\$ 
PWD=/burg/home/me
 return $_mlstatus
 return 1;
S_COLORS=auto
 set -$_mlshdbg;
 set +v;
 set +vx;
 set +x;
SHELL=/bin/bash
SHLVL=1
SLURM_CLUSTER_NAME=ourcluster
SLURM_CONF=/cm/shared/apps/slurm/var/etc/ourcluster/slurm.conf
SLURM_CPUS_ON_NODE=2
SLURMD_DEBUG=2
SLURM_DISTRIBUTION=cyclic
SLURMD_NODENAME=m002
SLURM_GTIDS=0
SLURM_JOB_ACCOUNT=rcs
SLURM_JOB_CPUS_PER_NODE=2
SLURM_JOB_END_TIME=1707626670
SLURM_JOB_GID=500
SLURM_JOB_GROUP=user
SLURM_JOB_ID=516635
SLURM_JOBID=516635
SLURM_JOB_NAME=bash
SLURM_JOB_NODELIST=m002
SLURM_JOB_NUM_NODES=1
SLURM_JOB_PARTITION=rcs,short
SLURM_JOB_QOS=h012
SLURM_JOB_START_TIME=1707621870
SLURM_JOB_UID=547289
SLURM_JOB_USER=me
SLURM_LAUNCH_NODE_IPADDR=10.197.29.1
SLURM_LOCALID=0
SLURM_MEM_PER_NODE=10240
SLURM_NNODES=1
SLURM_NODEID=0
SLURM_NODELIST=m002
SLURM_NPROCS=1
SLURM_NTASKS=1
SLURM_PRIO_PROCESS=0
SLURM_PROCID=0
SLURM_PTY_PORT=32913
SLURM_PTY_WIN_COL=139
SLURM_PTY_WIN_ROW=30
SLURM_SRUN_COMM_HOST=10.197.29.1
SLURM_SRUN_COMM_PORT=43323
SLURM_STEP_ID=0
SLURM_STEPID=0
SLURM_STEP_LAUNCHER_PORT=43323
SLURM_STEP_NODELIST=m002
SLURM_STEP_NUM_NODES=1
SLURM_STEP_NUM_TASKS=1
SLURM_STEP_TASKS_PER_NODE=1
SLURM_SUBMIT_DIR=/burg/home/me
SLURM_SUBMIT_HOST=mlogin001
SLURM_TASK_PID=441419
SLURM_TASKS_PER_NODE=1
SLURM_TOPOLOGY_ADDR=m002
SLURM_TOPOLOGY_ADDR_PATTERN=node
SLURM_UMASK=0022
SLURM_WORKING_CLUSTER=ourcluster:mcmd001:6817:9984:109
 source /cm/local/apps/environment-modules/4.5.3//init/bash;
SRUN_DEBUG=3
SSH_CLIENT=99.70.118.23 59137 22
SSH_CONNECTION=99.70.118.23 59137 128.59.124.102 22
SSH_TTY=/dev/pts/42
TERM=xterm-256color
TMPDIR=/tmp
 typeset swfound=0;
 typeset swfound=0;
 typeset swname='compatibility';
 typeset swname='main';
 unset IFS;
 unset _mlre _mlIFS;
 unset _mlre _mlv _mlrv _mlIFS;
 unset _mlshdbg;
 unset MODULES_USE_COMPAT_VERSION;
USER=me
_=/usr/bin/env
 *v*)
 *v*x*)
which_declare=declare -f
 *x*)
XDG_RUNTIME_DIR=/run/user/547289
XDG_SESSION_CLASS=user
XDG_SESSION_ID=150348
XDG_SESSION_TYPE=tty

as root no problem:

 nvidia-smi
Sat Feb 10 22:24:09 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|=========================================+======================+======================|
|   0  NVIDIA RTX A6000                On | 00000000:01:00.0 Off |                  Off |
| 49%   75C    P2              295W / 300W|  43325MiB / 49140MiB |     78%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000                On | 00000000:25:00.0 Off |                  Off |
| 43%   70C    P2              290W / 300W|  41495MiB / 49140MiB |    100%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   2  NVIDIA RTX A6000                On | 00000000:41:00.0 Off |                  Off |
| 46%   73C    P2              293W / 300W|  41535MiB / 49140MiB |     93%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   3  NVIDIA RTX A6000                On | 00000000:61:00.0 Off |                  Off |
| 39%   67C    P2              294W / 300W|  41533MiB / 49140MiB |    100%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   4  NVIDIA RTX A6000                On | 00000000:81:00.0 Off |                  Off |
| 30%   45C    P2              110W / 300W|    895MiB / 49140MiB |     20%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   5  NVIDIA RTX A6000                On | 00000000:A1:00.0 Off |                  Off |
| 39%   67C    P2              265W / 300W|  26701MiB / 49140MiB |    100%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   6  NVIDIA RTX A6000                On | 00000000:C1:00.0 Off |                  Off |
| 46%   73C    P2              292W / 300W|  41535MiB / 49140MiB |    100%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   7  NVIDIA RTX A6000                On | 00000000:E1:00.0 Off |                  Off |
| 41%   69C    P2              294W / 300W|  41495MiB / 49140MiB |     83%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+

Oh man, I do not miss Slurm… I had a feeling you were on an HPC cluster based on your original post.

Try the following:

$ LD_LIBRARY_PATH="" LD_PRELOAD="" nvidia-smi

$ LD_LIBRARY_PATH="" LD_PRELOAD="" /usr/bin/nvidia-smi
LD_LIBRARY_PATH="" LD_PRELOAD="" nvidia-smi
No devices were found

 LD_LIBRARY_PATH="" LD_PRELOAD="" /usr/bin/nvidia-smi
No devices were found

Same results.

Can you unload any and all modules you have loaded? It looks like your default user shell is bash, so could you also provide the contents (again, redact as needed) of the following files?

  • ~/.profile
  • ~/.bash_profile
  • ~/.bashrc

If there are other shell-specific files, like .zsh*, .csh*, .tcsh*, .login, etc.

If your user is the only one on the system that is having difficulty accessing the device files for nvidia-smi, my first assumption is there’s something in your environment that’s mucking things up.

There could be something defined at a global level in /etc/profile, /etc/bashrc, and /etc/profile.d, but you would notice other users reporting the same issue.

Noone else has reported it so I agree it’s likely in my environment vars.

cat ~/.profile
cat: /path/.profile: No such file or directory

 cat ~/.bash_profile
# .bash_profile

# Get the aliases and functions
if [ -f ~/.bashrc ]; then
        . ~/.bashrc
fi

# User specific environment and startup programs

PATH=$PATH:$HOME/bin

export PATH

:~$ cat ~/.bashrc
# .bashrc

# User specific aliases and functions

alias rm='rm -i'
alias cp='cp -i'
alias mv='mv -i'

export PS1='\u@\h:\w\$ '
# Source global definitions
if [ -f /etc/bashrc ]; then
        . /etc/bashrc
fi


# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
#__conda_setup="$('/path/opt/Miniforge3/23.3.1-1/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
#if [ $? -eq 0 ]; then
#    eval "$__conda_setup"
#else
#    if [ -f "/path/opt/Miniforge3/23.3.1-1/etc/profile.d/conda.sh" ]; then
#        . "/path/opt/Miniforge3/23.3.1-1/etc/profile.d/conda.sh"
#    else
#        export PATH="/path/opt/Miniforge3/23.3.1-1/bin:$PATH"
#    fi
#fi
#unset __conda_setup

#if [ -f "/path/opt/Miniforge3/23.3.1-1/etc/profile.d/mamba.sh" ]; then
#    . "/path/opt/Miniforge3/23.3.1-1/etc/profile.d/mamba.sh"
#fi
# <<< conda initialize <<<

export LMOD_SHORT_TIME=1

Notice that the conda lines are commented out.

Edit: One interesting finding is that if I sudo to root and then su - back to my acct, the command works.

Edit: only the Slurm module is loaded

What result do you get if you run the following command; does it match my output?

$ sha1sum /etc/bashrc /etc/profile /etc/profile.d/*
7743c11f20d32680eab82e12b8042792c263dd67  /etc/bashrc
d862efffc6a2f64d3a065606fe23508e711e5adc  /etc/profile
dd8e5f7156c585759b8c3d4d30b4ddf80f0911fb  /etc/profile.d/bash_completion.sh
6da63d9debe9ce564bac9e533288cc14f1d74d04  /etc/profile.d/colorgrep.csh
1e39e9c628803da5b6f5a5d9e77fbe3cfefc996a  /etc/profile.d/colorgrep.sh
efee52528666225008f8752cba1ae2440fd4ac8d  /etc/profile.d/colorls.csh
d37810d8175c6ebf08b3dd687fcb11c2e8c78087  /etc/profile.d/colorls.sh
96528a77660de1de1a2b2aae1966b6160ef1b5f6  /etc/profile.d/colorxzgrep.csh
ea95aafbca745e35c7db37efd783f9e60139009c  /etc/profile.d/colorxzgrep.sh
d3e397f3bf4e587535b1494dcbd429392795b6e2  /etc/profile.d/colorzgrep.csh
c11812ea09f3f050f3af4dc680a1e9670d7c32da  /etc/profile.d/colorzgrep.sh
13f50b8589668cb1e668ece7d5907fc3a3b88901  /etc/profile.d/csh.local
5b4effe6a06ae262b2d94b31fdddbde902c0ae53  /etc/profile.d/debuginfod.csh
d1649fd73e7d566b84ac82c6b4bc5c8c52997d4a  /etc/profile.d/debuginfod.sh
8885907baec3899ef96764a08bbe053455224b76  /etc/profile.d/gawk.csh
e69dfc5c1b719471f65c11b88325b80ae7e373e7  /etc/profile.d/gawk.sh
1acf946eb09ca7854cc0c2389b284a061a4fb0fc  /etc/profile.d/lang.csh
cda23d45cf45e4fdc5f907e460e35b502b9a4285  /etc/profile.d/lang.sh
a7519eaf44643717ca3ff5620c4057baaf1b747d  /etc/profile.d/less.csh
d7cb5121015442064baa5458bda795063ccf679a  /etc/profile.d/less.sh
4008fb3787fe37f16b9530eccfdaa4563c93b920  /etc/profile.d/sh.local
61ff86612e528816ed3c09f2a1b0284db5f17041  /etc/profile.d/which2.csh
357809d0cd7b78caf989956f84b056c25098f4d7  /etc/profile.d/which2.sh

no:

sha1sum /etc/bashrc /etc/profile /etc/profile.d/*
f6fdaa8f3d0462c507a3072fc215289317f07bcf  /etc/bashrc
d862efffc6a2f64d3a065606fe23508e711e5adc  /etc/profile
dd8e5f7156c585759b8c3d4d30b4ddf80f0911fb  /etc/profile.d/bash_completion.sh
6da63d9debe9ce564bac9e533288cc14f1d74d04  /etc/profile.d/colorgrep.csh
1e39e9c628803da5b6f5a5d9e77fbe3cfefc996a  /etc/profile.d/colorgrep.sh
efee52528666225008f8752cba1ae2440fd4ac8d  /etc/profile.d/colorls.csh
d37810d8175c6ebf08b3dd687fcb11c2e8c78087  /etc/profile.d/colorls.sh
b7dc5db7ee3681a261f90dcba9af553ab8d283d1  /etc/profile.d/colorsysstat.csh
9cc4f547bd9714d95bb5704d18041487fc1a2942  /etc/profile.d/colorsysstat.sh
96528a77660de1de1a2b2aae1966b6160ef1b5f6  /etc/profile.d/colorxzgrep.csh
ea95aafbca745e35c7db37efd783f9e60139009c  /etc/profile.d/colorxzgrep.sh
d3e397f3bf4e587535b1494dcbd429392795b6e2  /etc/profile.d/colorzgrep.csh
c11812ea09f3f050f3af4dc680a1e9670d7c32da  /etc/profile.d/colorzgrep.sh
13f50b8589668cb1e668ece7d5907fc3a3b88901  /etc/profile.d/csh.local
8885907baec3899ef96764a08bbe053455224b76  /etc/profile.d/gawk.csh
e69dfc5c1b719471f65c11b88325b80ae7e373e7  /etc/profile.d/gawk.sh
1acf946eb09ca7854cc0c2389b284a061a4fb0fc  /etc/profile.d/lang.csh
cda23d45cf45e4fdc5f907e460e35b502b9a4285  /etc/profile.d/lang.sh
a7519eaf44643717ca3ff5620c4057baaf1b747d  /etc/profile.d/less.csh
d7cb5121015442064baa5458bda795063ccf679a  /etc/profile.d/less.sh
d3c7bc83a2e140567b9352f8b4a23b2cfe97c67b  /etc/profile.d/man.csh
d3b530034a68d1571b00cbde594cb633ab7d8d7f  /etc/profile.d/man.sh
715fe8a914562069093874ee37417b3491803020  /etc/profile.d/modules.csh
5463cd3ecd683b4aac2226fd5f1e49b3789422dd  /etc/profile.d/modules.sh
fbf0e4f9c947f6c13f4c6999fc654b937464f1b8  /etc/profile.d/mpi-selector.csh
502fd98565483b2a57600bf6f8c12c3c1cecd114  /etc/profile.d/mpi-selector.sh
4008fb3787fe37f16b9530eccfdaa4563c93b920  /etc/profile.d/sh.local
65e59c822a3fd04e1790bcd25828548334296d57  /etc/profile.d/ssh.csh
67bbf0a12c39bb0f7248257a1abcbb2ebb4c478e  /etc/profile.d/ssh.sh
61ff86612e528816ed3c09f2a1b0284db5f17041  /etc/profile.d/which2.csh
5ea168fa0ce93466c3a43bc2903be795e064ce0e  /etc/profile.d/which2.sh

So compared to my stock RHEL 9 install, what stands out is that your /etc/bashrc has a different checksum than mine:

# /etc/bashrc

# System wide functions and aliases
# Environment stuff goes in /etc/profile

# It's NOT a good idea to change this file unless you know what you
# are doing. It's much better to create a custom.sh shell script in
# /etc/profile.d/ to make custom changes to your environment, as this
# will prevent the need for merging in future updates.

# Prevent doublesourcing
if [ -z "$BASHRCSOURCED" ]; then
  BASHRCSOURCED="Y"

  # are we an interactive shell?
  if [ "$PS1" ]; then
    if [ -z "$PROMPT_COMMAND" ]; then
      case $TERM in
      xterm*|vte*)
        if [ -e /etc/sysconfig/bash-prompt-xterm ]; then
            PROMPT_COMMAND=/etc/sysconfig/bash-prompt-xterm
        else
            PROMPT_COMMAND='printf "\033]0;%s@%s:%s\007" "${USER}" "${HOSTNAME%%.*}" "${PWD/#$HOME/\~}"'
        fi
        ;;
      screen*)
        if [ -e /etc/sysconfig/bash-prompt-screen ]; then
            PROMPT_COMMAND=/etc/sysconfig/bash-prompt-screen
        else
            PROMPT_COMMAND='printf "\033k%s@%s:%s\033\\" "${USER}" "${HOSTNAME%%.*}" "${PWD/#$HOME/\~}"'
        fi
        ;;
      *)
        [ -e /etc/sysconfig/bash-prompt-default ] && PROMPT_COMMAND=/etc/sysconfig/bash-prompt-default
        ;;
      esac
    fi
    # Turn on parallel history
    shopt -s histappend
    history -a
    # Turn on checkwinsize
    shopt -s checkwinsize
    [ "$PS1" = "\\s-\\v\\\$ " ] && PS1="[\u@\h \W]\\$ "
    # You might want to have e.g. tty in prompt (e.g. more virtual machines)
    # and console windows
    # If you want to do so, just add e.g.
    # if [ "$PS1" ]; then
    #   PS1="[\u@\h:\l \W]\\$ "
    # fi
    # to your custom modification shell script in /etc/profile.d/ directory
  fi

  if ! shopt -q login_shell ; then # We're not a login shell
    # Need to redefine pathmunge, it gets undefined at the end of /etc/profile
    pathmunge () {
        case ":${PATH}:" in
            *:"$1":*)
                ;;
            *)
                if [ "$2" = "after" ] ; then
                    PATH=$PATH:$1
                else
                    PATH=$1:$PATH
                fi
        esac
    }

    # Set default umask for non-login shell only if it is set to 0
    [ `umask` -eq 0 ] && umask 022

    SHELL=/bin/bash
    # Only display echos from profile.d scripts if we are no login shell
    # and interactive - otherwise just process them to set envvars
    for i in /etc/profile.d/*.sh; do
        if [ -r "$i" ]; then
            if [ "$PS1" ]; then
                . "$i"
            else
                . "$i" >/dev/null
            fi
        fi
    done

    unset i
    unset -f pathmunge
  fi

fi
# vim:ts=4:sw=4

Though I see you have either the Lmod or environment-modules shell script in the environment, so that’s likely where things are originating from (but this should still be user-centric). You can find out which one via rpm -qf /etc/profile.d/modules.sh.

Another you can do is with a near empty shell environment. I tested and can confirm this ignores everything in /etc/profile.d, /etc/bashrc, and user configurations.

# Enter non-login clean room sub-shell
$ env -i HOME=$(mktemp -d) /bin/bash --noprofile --norc

# Verify the environment is clean
bash-5.2$ set
...

# Run a system command
bash-5.2$ nvidia-smi
Mon Feb 12 23:26:45 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.29.06              Driver Version: 545.29.06    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|=========================================+======================+======================|
|   0  NVIDIA GeForce GTX 1070        Off | 00000000:03:00.0  On |                  N/A |
| 30%   45C    P0              30W / 151W |    488MiB /  8192MiB |      1%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                                         
+---------------------------------------------------------------------------------------+
| Processes:                                                                            |
|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
|        ID   ID                                                             Usage      |
|=======================================================================================|
|    0   N/A  N/A      2502      G   /usr/bin/gnome-shell                        302MiB |
|    0   N/A  N/A      4069      G   /usr/lib64/firefox/firefox                  166MiB |
+---------------------------------------------------------------------------------------+

No difference.

env -i HOME=$(mktemp -d) /bin/bash --noprofile --norc
bash-5.1$ nvidia-smi
No devices were found
rpm -qf /etc/profile.d/modules.sh
cm-modules-init-client-9.2-76_cm9.2.noarch

The cm indicates Bright Cluster Manager.

@mroche does this strace help? Still struggling with this:

 strace nvidia-smi
execve("/usr/bin/nvidia-smi", ["nvidia-smi"], 0x7fffffffcde0 /* 107 vars */) = 0
brk(NULL)                               = 0x69d000
arch_prctl(0x3001 /* ARCH_??? */, 0x7fffffffcc50) = -1 EINVAL (Invalid argument)
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v3/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v3", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v2/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/glibc-hwcaps/x86-64-v2", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/tls", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm", {st_mode=S_IFDIR|0755, st_size=8192, ...}, 0) = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v3/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v3", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v2/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/glibc-hwcaps/x86-64-v2", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/tls", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/x86_64", 0x7fffffffbe90, 0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=58187, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 58187, PROT_READ, MAP_PRIVATE, 3, 0) = 0x155555509000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x155555507000
openat(AT_FDCWD, "/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=15960, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555502000
mmap(0x155555503000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555503000
mmap(0x155555504000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555504000
mmap(0x155555505000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555505000
mmap(0x155555506000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555506000
close(3)                                = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\363\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=905848, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 892944, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555427000
mmap(0x155555436000, 458752, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xf000) = 0x155555436000
mmap(0x1555554a6000, 368640, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7f000) = 0x1555554a6000
mmap(0x155555500000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xd8000) = 0x155555500000
close(3)                                = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=15936, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555422000
mmap(0x155555423000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555423000
mmap(0x155555424000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555424000
mmap(0x155555425000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555425000
mmap(0x155555426000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555426000
close(3)                                = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220P\4\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0y\356%$[\271\321\0350\340\225\347\356&)\252"..., 68, 896) = 68
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=2389384, ...}, AT_EMPTY_PATH) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2136752, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555218000
mprotect(0x155555244000, 1880064, PROT_NONE) = 0
mmap(0x155555244000, 1531904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2c000) = 0x155555244000
mmap(0x1555553ba000, 344064, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1a2000) = 0x1555553ba000
mmap(0x15555540f000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1f6000) = 0x15555540f000
mmap(0x155555415000, 51888, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555415000
close(3)                                = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/librt.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/librt.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/librt.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=16656, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555213000
mmap(0x155555214000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555214000
mmap(0x155555215000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555215000
mmap(0x155555216000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555216000
mmap(0x155555217000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555217000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x155555211000
arch_prctl(ARCH_SET_FS, 0x1555552120c0) = 0
set_tid_address(0x155555212390)         = 2399822
set_robust_list(0x1555552123a0, 24)     = 0
mprotect(0x15555540f000, 12288, PROT_READ) = 0
mprotect(0x155555216000, 4096, PROT_READ) = 0
mprotect(0x155555425000, 4096, PROT_READ) = 0
mprotect(0x155555500000, 4096, PROT_READ) = 0
mprotect(0x155555505000, 4096, PROT_READ) = 0
mprotect(0x681000, 86016, PROT_READ)    = 0
mprotect(0x155555552000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=RLIM64_INFINITY, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x155555509000, 58187)           = 0
getrandom("\x3a\x00\x43\x41\x74\x49\x94\xdf", 8, GRND_NONBLOCK) = 8
brk(NULL)                               = 0x69d000
brk(0x6be000)                           = 0x6be000
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libnvidia-ml.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libnvidia-ml.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=58187, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 58187, PROT_READ, MAP_PRIVATE, 3, 0) = 0x155555509000
close(3)                                = 0
openat(AT_FDCWD, "/cm/local/apps/cuda/libs/current/lib64/libnvidia-ml.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 <\1\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=1806968, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16449256, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155554261000
mprotect(0x1555543c9000, 2093056, PROT_NONE) = 0
mmap(0x1555545c8000, 335872, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x167000) = 0x1555545c8000
mmap(0x15555461a000, 12545768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x15555461a000
close(3)                                = 0
mprotect(0x1555545c8000, 331776, PROT_READ) = 0
munmap(0x155555509000, 58187)           = 0
getpid()                                = 2399822
openat(AT_FDCWD, "/proc/modules", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "overlay 155648 2 - Live 0x000000"..., 1024) = 1024
read(3, "\nrfkill 32768 1 - Live 0x0000000"..., 1024) = 1024
close(3)                                = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(3)                                = 0
stat("/dev/nvidiactl", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0xff), ...}) = 0
openat(AT_FDCWD, "/dev/nvidiactl", O_RDWR) = 3
fcntl(3, F_SETFD, FD_CLOEXEC)           = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xd2, 0x48), 0x7fffffffc3c0) = 0
openat(AT_FDCWD, "/sys/devices/system/memory/block_size_bytes", O_RDONLY) = 4
read(4, "80000000\n", 99)               = 9
close(4)                                = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xd6, 0x8), 0x7fffffffc450) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0xc8, 0x900), 0x15555520f4a0) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2b, 0x20), 0x7fffffffc520) = 0
openat(AT_FDCWD, "/proc/devices", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "Character devices:\n  1 mem\n  4 /"..., 1024) = 888
close(4)                                = 0
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/config", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 1\nDeviceFileMod"..., 1024) = 59
close(4)                                = 0
mkdir("/dev/nvidia-caps", 0755)         = -1 EEXIST (File exists)
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/config", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 1\nDeviceFileMod"..., 1024) = 59
read(4, "", 1024)                       = 0
close(4)                                = 0
stat("/dev/nvidia-caps/nvidia-cap1", {st_mode=S_IFCHR|0400, st_rdev=makedev(0x1fa, 0x1), ...}) = 0
access("/dev/nvidia-caps/nvidia-cap1", R_OK) = -1 EACCES (Permission denied)
openat(AT_FDCWD, "/proc/devices", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "Character devices:\n  1 mem\n  4 /"..., 1024) = 888
close(4)                                = 0
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/monitor", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 2\nDeviceFileMod"..., 1024) = 59
close(4)                                = 0
mkdir("/dev/nvidia-caps", 0755)         = -1 EEXIST (File exists)
openat(AT_FDCWD, "/proc/driver/nvidia/capabilities/mig/monitor", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "DeviceFileMinor: 2\nDeviceFileMod"..., 1024) = 59
read(4, "", 1024)                       = 0
close(4)                                = 0
stat("/dev/nvidia-caps/nvidia-cap2", {st_mode=S_IFCHR|0444, st_rdev=makedev(0x1fa, 0x2), ...}) = 0
access("/dev/nvidia-caps/nvidia-cap2", R_OK) = 0
openat(AT_FDCWD, "/dev/nvidia-caps/nvidia-cap2", O_RDONLY|O_CLOEXEC) = 4
fcntl(4, F_GETFD)                       = 0x1 (flags FD_CLOEXEC)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2b, 0x28), 0x7fffffffc490) = 0
close(4)                                = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia2", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x2), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia2", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia2", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia3", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x3), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia3", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia3", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia1", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x1), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia1", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia1", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia0", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia0", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia0", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia6", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x6), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia6", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia6", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia7", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x7), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia7", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia7", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia5", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x5), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia5", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia5", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffba80) = 0
openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 4
newfstatat(4, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(4, "ResmanDebugLevel: 4294967295\nRmL"..., 1024) = 900
close(4)                                = 0
stat("/dev/nvidia4", {st_mode=S_IFCHR|0666, st_rdev=makedev(0xc3, 0x4), ...}) = 0
openat(AT_FDCWD, "/dev/nvidia4", O_RDWR|O_CLOEXEC) = -1 EPERM (Operation not permitted)
openat(AT_FDCWD, "/dev/nvidia4", O_RDWR) = -1 EPERM (Operation not permitted)
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffff9610) = 0
getpid()                                = 2399822
newfstatat(1, "", {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x6), ...}, AT_EMPTY_PATH) = 0
write(1, "No devices were found\n", 22No devices were found
) = 22
getpid()                                = 2399822
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x2a, 0x20), 0x7fffffffb880) = 0
ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x46, 0x29, 0x10), 0x7fffffffc2f0) = 0
close(3)                                = 0
exit_group(6)                           = ?
+++ exited with 6 +++

EDIT: just comparing vs root user:

openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=58187, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 58187, PROT_READ, MAP_PRIVATE, 3, 0) = 0x155555509000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x155555507000
openat(AT_FDCWD, "/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=15960, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555502000
mmap(0x155555503000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555503000
mmap(0x155555504000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555504000
mmap(0x155555505000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555505000
mmap(0x155555506000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555506000
close(3)                                = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = 3

As my user:

openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
newfstatat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=58187, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 58187, PROT_READ, MAP_PRIVATE, 3, 0) = 0x155555509000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x155555507000
openat(AT_FDCWD, "/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\20\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=15960, ...}, AT_EMPTY_PATH) = 0 
mmap(NULL, 16392, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x155555502000
mmap(0x155555503000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x155555503000
mmap(0x155555504000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555504000
mmap(0x155555505000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x155555505000
mmap(0x155555506000, 8, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x155555506000
close(3)                                = 0
openat(AT_FDCWD, "/cm/shared/apps/slurm/current/lib64/slurm/libm.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)

Pretty simliar.

Well now I feel silly. This is a Slurm job scheduler in order to request the GPU via an interactive job, i.e., a srun one needs to pass (at least) --gres=gpu:1.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.