Libcuda.so recreates device symlinks ad nauseam

Any program that uses libcuda.so tries to remove and recreate the symlink from /dev/nvidiaX to /dev/char/195:X.

# ls -alh /dev/nvidia0 /dev/char/195:0 -d /dev/char
drwxr-xr-x 2 root root    4,7K  9. Apr 15:19 /dev/char
lrwxrwxrwx 1 root sddm      10  9. Apr 15:17 /dev/char/195:0 -> ../nvidia0
crw-rw---- 1 root video 195, 0  9. Apr 12:23 /dev/nvidia0
$ LANG=C.UTF-8 strace -e file -f __nvcc_device_query
execve("/opt/cuda-12.8.1/bin/__nvcc_device_query", ["__nvcc_device_query"], 0x7ffe4cf1dcf8 /* 115 vars */) = 0
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib/gcc/x86_64-pc-linux-gnu/15/libstdc++.so.6", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib/gcc/x86_64-pc-linux-gnu/15/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib64/libcuda.so.1", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib64/librt.so.1", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/proc/sys/vm/mmap_min_addr", O_RDONLY) = 3
openat(AT_FDCWD, "/proc/cpuinfo", O_RDONLY) = 3
openat(AT_FDCWD, "/proc/self/maps", O_RDONLY) = 3
openat(AT_FDCWD, "/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC) = 3
strace: Process 8872 attached
[pid  8871] openat(AT_FDCWD, "/proc/self/task/8872/comm", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 8
[pid  8871] openat(AT_FDCWD, "/dev/shm/cuda_injection_path_shm", O_RDWR|O_NOFOLLOW|O_CLOEXEC) = -1 ENOENT (No such file or directory)
[pid  8871] openat(AT_FDCWD, "/home/user/.nv/nvidia-application-profile-globals-rc", O_RDONLY) = 8
[pid  8871] openat(AT_FDCWD, "/home/user/.nv/nvidia-application-profiles-rc", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid  8871] openat(AT_FDCWD, "/home/user/.nv/nvidia-application-profiles-rc.d", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid  8871] openat(AT_FDCWD, "/etc/nvidia/nvidia-application-profiles-rc", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid  8871] openat(AT_FDCWD, "/etc/nvidia/nvidia-application-profiles-rc.d/", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid  8871] openat(AT_FDCWD, "/usr/share/nvidia/nvidia-application-profiles-570.133.07-rc", O_RDONLY) = 8
[pid  8871] openat(AT_FDCWD, "/usr/share/nvidia/nvidia-application-profiles-rc", O_RDONLY) = -1 ENOENT (No such file or directory)
[pid  8871] readlink("/proc/8871/exe", "/opt/cuda-12.8.1/bin/__nvcc_devi"..., 4095) = 40
[pid  8871] openat(AT_FDCWD, "/proc/self/cmdline", O_RDONLY) = 8
[pid  8871] readlink("/proc", 0x7ffc528d3170, 1023) = -1 EINVAL (Invalid argument)
[pid  8871] readlink("/proc/self", "8871", 1023) = 4
[pid  8871] readlink("/proc/8871", 0x7ffc528d3170, 1023) = -1 EINVAL (Invalid argument)
[pid  8871] readlink("/proc/8871/exe", "/opt/cuda-12.8.1/bin/__nvcc_devi"..., 1023) = 40
[pid  8871] readlink("/opt", 0x7ffc528d3170, 1023) = -1 EINVAL (Invalid argument)
[pid  8871] readlink("/opt/cuda-12.8.1", 0x7ffc528d3170, 1023) = -1 EINVAL (Invalid argument)
[pid  8871] readlink("/opt/cuda-12.8.1/bin", 0x7ffc528d3170, 1023) = -1 EINVAL (Invalid argument)
[pid  8871] readlink("/opt/cuda-12.8.1/bin/__nvcc_device_query", 0x7ffc528d3170, 1023) = -1 EINVAL (Invalid argument)
[pid  8871] access("/sys/module/nvidia/initstate", R_OK) = 0
[pid  8871] openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 8
[pid  8871] stat("/dev/nvidiactl", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0xff), ...}) = 0
[pid  8871] stat("/dev/nvidiactl", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0xff), ...}) = 0
[pid  8871] unlink("/dev/char/195:255") = -1 EACCES (Permission denied)
[pid  8871] symlink("../nvidiactl", "/dev/char/195:255") = -1 EEXIST (File exists)
[pid  8871] stat("/dev/char/195:255", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0xff), ...}) = 0
[pid  8871] openat(AT_FDCWD, "/dev/nvidiactl", O_RDWR) = 8
[pid  8871] openat(AT_FDCWD, "/sys/devices/system/memory/block_size_bytes", O_RDONLY) = 9
[pid  8871] stat("/proc/driver/nvidia/gpus/0000:07:00.0/numa_status", 0x7ffc528d3680) = -1 ENOENT (No such file or directory)
[pid  8871] access("/sys/module/nvidia/initstate", R_OK) = 0
[pid  8871] openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 8
[pid  8871] stat("/dev/nvidiactl", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0xff), ...}) = 0
[pid  8871] stat("/dev/nvidiactl", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0xff), ...}) = 0
[pid  8871] unlink("/dev/char/195:255") = -1 EACCES (Permission denied)
[pid  8871] symlink("../nvidiactl", "/dev/char/195:255") = -1 EEXIST (File exists)
[pid  8871] stat("/dev/char/195:255", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0xff), ...}) = 0
[pid  8871] openat(AT_FDCWD, "/dev/nvidiactl", O_RDWR) = 8
[pid  8871] openat(AT_FDCWD, "/sys/devices/system/memory/block_size_bytes", O_RDONLY) = 9
[pid  8871] stat("/proc/driver/nvidia/gpus/0000:07:00.0/numa_status", 0x7ffc528d3c50) = -1 ENOENT (No such file or directory)
[pid  8871] openat(AT_FDCWD, "/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC) = 9
[pid  8871] openat(AT_FDCWD, "/proc/self/status", O_RDONLY) = 9
[pid  8871] openat(AT_FDCWD, "/sys/devices/system/node", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 9
[pid  8871] openat(AT_FDCWD, "/sys/devices/system/node/node0/cpumap", O_RDONLY) = 10
[pid  8871] access("/sys/module/nvidia_uvm/initstate", R_OK) = 0
[pid  8871] openat(AT_FDCWD, "/proc/devices", O_RDONLY) = 9
[pid  8871] stat("/dev/nvidia-uvm", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xf0, 0), ...}) = 0
[pid  8871] chmod("/dev/nvidia-uvm", 0666) = -1 EPERM (Operation not permitted)
[pid  8871] stat("/usr/bin/nvidia-modprobe", {st_mode=S_IFREG|0710, st_size=31072, ...}) = 0
strace: Process 8873 attached
[pid  8873] execve("/usr/bin/nvidia-modprobe", ["/usr/bin/nvidia-modprobe", "-u", "-c=0"], 0x7f713b3ec868 /* 0 vars */) = -1 EACCES (Permission denied)
[pid  8873] +++ exited with 127 +++
[pid  8872] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=8873, si_uid=1000, si_status=127, si_utime=0, si_stime=0} ---
[pid  8871] openat(AT_FDCWD, "/dev/nvidia-uvm", O_RDWR|O_CLOEXEC) = 9
[pid  8871] openat(AT_FDCWD, "/dev/nvidia-uvm", O_RDWR|O_CLOEXEC) = 10
[pid  8871] openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 11
[pid  8871] stat("/dev/nvidia0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] stat("/dev/nvidia0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] unlink("/dev/char/195:0") = -1 EACCES (Permission denied)
[pid  8871] symlink("../nvidia0", "/dev/char/195:0") = -1 EEXIST (File exists)
[pid  8871] stat("/dev/char/195:0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] openat(AT_FDCWD, "/dev/nvidia0", O_RDWR|O_CLOEXEC) = 11
[pid  8871] openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 12
[pid  8871] stat("/dev/nvidia0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] stat("/dev/nvidia0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] unlink("/dev/char/195:0") = -1 EACCES (Permission denied)
[pid  8871] symlink("../nvidia0", "/dev/char/195:0") = -1 EEXIST (File exists)
[pid  8871] stat("/dev/char/195:0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] openat(AT_FDCWD, "/dev/nvidia0", O_RDWR|O_CLOEXEC) = 12
[pid  8871] openat(AT_FDCWD, "/proc/driver/nvidia/params", O_RDONLY) = 13
[pid  8871] stat("/dev/nvidia0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] stat("/dev/nvidia0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] unlink("/dev/char/195:0") = -1 EACCES (Permission denied)
[pid  8871] symlink("../nvidia0", "/dev/char/195:0") = -1 EEXIST (File exists)
[pid  8871] stat("/dev/char/195:0", {st_mode=S_IFCHR|0660, st_rdev=makedev(0xc3, 0), ...}) = 0
[pid  8871] openat(AT_FDCWD, "/dev/nvidia0", O_RDWR|O_CLOEXEC) = 13
[pid  8871] openat(AT_FDCWD, "/dev/nvidia0", O_RDWR|O_CLOEXEC) = 14
[pid  8871] openat(AT_FDCWD, "/dev/nvidia0", O_RDWR|O_CLOEXEC) = 15
[pid  8871] openat(AT_FDCWD, "/dev/nvidiactl", O_RDWR|O_CLOEXEC) = 15
[pid  8871] openat(AT_FDCWD, "/sys/bus/pci/devices/0000:07:00.0/numa_node", O_RDONLY) = 15
[pid  8871] mkdir("/home", 0700) = -1 EEXIST (File exists)
[pid  8871] mkdir("/home/user", 0700) = -1 EEXIST (File exists)
[pid  8871] mkdir("/home/user/.nv", 0700) = -1 EEXIST (File exists)
[pid  8871] mkdir("/home/user/.nv/ComputeCache", 0700) = -1 EEXIST (File exists)
[pid  8871] stat("/proc/8871/ns/pid", {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
[pid  8871] stat("/proc/8871/ns/pid", {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
[pid  8871] unlink("") = -1 ENOENT (No such file or directory)
89[pid  8872] +++ exited with 0 +++
+++ exited with 0 +++

If that is done from a user with write permissions it will change the owner and group to those of the program ran. Which will break permissions for users in the original group.

If that is done from a user without write permissions it will only silently fail if the symlink already pointed to the correct target device. But the attempt happens never the less and breaks attempts to sandbox write access /dev/char. It also seems backwards. Why not check the validity first?

There is code in modprobe-utils that seems to be the cause, but it’s unclear how that ends up in libcuda.so.

I would have reported it to the nvidia-modprobe repo, but that same logic seems to be in more places.

TLDR: Don’t delete and re-create the same symlink over and over again with varying permissions.

1 Like

CC: @abchauhan @amrits @aplattner