Cuda directory inside container doesn't contain enough libraries to import torch

docker run --hostname $(hostname) --network host --runtime nvidia --privileged --rm -itd nvcr.io/nvidia/l4t-ml:r32.7.1-py3
docker attach 74153d46c073
root@xavier-nx:/# python3
Python 3.6.9 (default, Dec  8 2021, 21:08:43) 
[GCC 8.4.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch  
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/local/lib/python3.6/dist-packages/torch/__init__.py", line 196, in <module>
    _load_global_deps()
  File "/usr/local/lib/python3.6/dist-packages/torch/__init__.py", line 149, in _load_global_deps
    ctypes.CDLL(lib_path, mode=ctypes.RTLD_GLOBAL)
  File "/usr/lib/python3.6/ctypes/__init__.py", line 348, in __init__
    self._handle = _dlopen(self._name, mode)
OSError: libcurand.so.10: cannot open shared object file: No such file or directory
>>> 

This happens typically as a result of partial link from host /usr/local/cuda-10.2/ to container
Then inside container:

root@airlab-wildfire-ordv2:/# ll /usr/local/cuda-10.2/lib64/
total 1544
drwxr-xr-x 1 root root   4096 Dec 15  2021 ./
drwxr-xr-x 1 root root   4096 Dec 15  2021 ../
-rw-r--r-- 1 root root 679636 Dec 15  2021 libcudadevrt.a
-rw-r--r-- 1 root root 888074 Dec 15  2021 libcudart_static.a
drwxr-xr-x 2 root root   4096 Dec 15  2021 stubs/

Whereas on host:

ll /usr/local/cuda-10.2/lib64/
total 2.2G
lrwxrwxrwx 1 root root   17 Mar  1  2021 libcublasLt.so -> libcublasLt.so.10
lrwxrwxrwx 1 root root   25 Mar  1  2021 libcublasLt.so.10 -> libcublasLt.so.10.2.3.300
-rw-r--r-- 1 root root  33M Mar  1  2021 libcublasLt.so.10.2.3.300
-rw-r--r-- 1 root root  35M Mar  1  2021 libcublasLt_static.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libcublas.so -> libcublas.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libcublas.so.10 -> libcublas.so.10.2.3.300
-rw-r--r-- 1 root root  78M Mar  1  2021 libcublas.so.10.2.3.300
-rw-r--r-- 1 root root  93M Mar  1  2021 libcublas_static.a
-rw-r--r-- 1 root root 664K Mar  1  2021 libcudadevrt.a
lrwxrwxrwx 1 root root   17 Mar  1  2021 libcudart.so -> libcudart.so.10.2
lrwxrwxrwx 1 root root   21 Mar  1  2021 libcudart.so.10.2 -> libcudart.so.10.2.300
-rw-r--r-- 1 root root 480K Mar  1  2021 libcudart.so.10.2.300
-rw-r--r-- 1 root root 868K Mar  1  2021 libcudart_static.a
lrwxrwxrwx 1 root root   14 Mar  1  2021 libcufft.so -> libcufft.so.10
lrwxrwxrwx 1 root root   22 Mar  1  2021 libcufft.so.10 -> libcufft.so.10.1.2.300
-rw-r--r-- 1 root root 193M Mar  1  2021 libcufft.so.10.1.2.300
-rw-r--r-- 1 root root 184M Mar  1  2021 libcufft_static.a
-rw-r--r-- 1 root root 201M Mar  1  2021 libcufft_static_nocallback.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libcufftw.so -> libcufftw.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libcufftw.so.10 -> libcufftw.so.10.1.2.300
-rw-r--r-- 1 root root 492K Mar  1  2021 libcufftw.so.10.1.2.300
-rw-r--r-- 1 root root  32K Mar  1  2021 libcufftw_static.a
lrwxrwxrwx 1 root root   18 Mar  1  2021 libcuinj64.so -> libcuinj64.so.10.2
lrwxrwxrwx 1 root root   22 Mar  1  2021 libcuinj64.so.10.2 -> libcuinj64.so.10.2.300
-rw-r--r-- 1 root root 1.5M Mar  1  2021 libcuinj64.so.10.2.300
-rw-r--r-- 1 root root  33K Mar  1  2021 libculibos.a
lrwxrwxrwx 1 root root   16 Mar  1  2021 libcupti.so -> libcupti.so.10.2
lrwxrwxrwx 1 root root   20 Mar  1  2021 libcupti.so.10.2 -> libcupti.so.10.2.175
-rw-r--r-- 1 root root 4.4M Mar  1  2021 libcupti.so.10.2.175
lrwxrwxrwx 1 root root   15 Mar  1  2021 libcurand.so -> libcurand.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libcurand.so.10 -> libcurand.so.10.1.2.300
-rw-r--r-- 1 root root  60M Mar  1  2021 libcurand.so.10.1.2.300
-rw-r--r-- 1 root root  60M Mar  1  2021 libcurand_static.a
lrwxrwxrwx 1 root root   17 Mar  1  2021 libcusolver.so -> libcusolver.so.10
lrwxrwxrwx 1 root root   25 Mar  1  2021 libcusolver.so.10 -> libcusolver.so.10.3.0.300
-rw-r--r-- 1 root root 209M Mar  1  2021 libcusolver.so.10.3.0.300
-rw-r--r-- 1 root root 119M Mar  1  2021 libcusolver_static.a
lrwxrwxrwx 1 root root   17 Mar  1  2021 libcusparse.so -> libcusparse.so.10
lrwxrwxrwx 1 root root   25 Mar  1  2021 libcusparse.so.10 -> libcusparse.so.10.3.1.300
-rw-r--r-- 1 root root 135M Mar  1  2021 libcusparse.so.10.3.1.300
-rw-r--r-- 1 root root 143M Mar  1  2021 libcusparse_static.a
-rw-r--r-- 1 root root 8.0M Mar  1  2021 liblapack_static.a
-rw-r--r-- 1 root root 888K Mar  1  2021 libmetis_static.a
lrwxrwxrwx 1 root root   13 Mar  1  2021 libnppc.so -> libnppc.so.10
lrwxrwxrwx 1 root root   21 Mar  1  2021 libnppc.so.10 -> libnppc.so.10.2.1.300
-rw-r--r-- 1 root root 492K Mar  1  2021 libnppc.so.10.2.1.300
-rw-r--r-- 1 root root  27K Mar  1  2021 libnppc_static.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libnppial.so -> libnppial.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libnppial.so.10 -> libnppial.so.10.2.1.300
-rw-r--r-- 1 root root  11M Mar  1  2021 libnppial.so.10.2.1.300
-rw-r--r-- 1 root root  14M Mar  1  2021 libnppial_static.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libnppicc.so -> libnppicc.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libnppicc.so.10 -> libnppicc.so.10.2.1.300
-rw-r--r-- 1 root root 4.7M Mar  1  2021 libnppicc.so.10.2.1.300
-rw-r--r-- 1 root root 5.5M Mar  1  2021 libnppicc_static.a
lrwxrwxrwx 1 root root   16 Mar  1  2021 libnppicom.so -> libnppicom.so.10
lrwxrwxrwx 1 root root   24 Mar  1  2021 libnppicom.so.10 -> libnppicom.so.10.2.1.300
-rw-r--r-- 1 root root 1.4M Mar  1  2021 libnppicom.so.10.2.1.300
-rw-r--r-- 1 root root 1.1M Mar  1  2021 libnppicom_static.a
lrwxrwxrwx 1 root root   16 Mar  1  2021 libnppidei.so -> libnppidei.so.10
lrwxrwxrwx 1 root root   24 Mar  1  2021 libnppidei.so.10 -> libnppidei.so.10.2.1.300
-rw-r--r-- 1 root root 7.8M Mar  1  2021 libnppidei.so.10.2.1.300
-rw-r--r-- 1 root root  11M Mar  1  2021 libnppidei_static.a
lrwxrwxrwx 1 root root   14 Mar  1  2021 libnppif.so -> libnppif.so.10
lrwxrwxrwx 1 root root   22 Mar  1  2021 libnppif.so.10 -> libnppif.so.10.2.1.300
-rw-r--r-- 1 root root  52M Mar  1  2021 libnppif.so.10.2.1.300
-rw-r--r-- 1 root root  56M Mar  1  2021 libnppif_static.a
lrwxrwxrwx 1 root root   14 Mar  1  2021 libnppig.so -> libnppig.so.10
lrwxrwxrwx 1 root root   22 Mar  1  2021 libnppig.so.10 -> libnppig.so.10.2.1.300
-rw-r--r-- 1 root root  28M Mar  1  2021 libnppig.so.10.2.1.300
-rw-r--r-- 1 root root  30M Mar  1  2021 libnppig_static.a
lrwxrwxrwx 1 root root   14 Mar  1  2021 libnppim.so -> libnppim.so.10
lrwxrwxrwx 1 root root   22 Mar  1  2021 libnppim.so.10 -> libnppim.so.10.2.1.300
-rw-r--r-- 1 root root 6.9M Mar  1  2021 libnppim.so.10.2.1.300
-rw-r--r-- 1 root root 7.1M Mar  1  2021 libnppim_static.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libnppist.so -> libnppist.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libnppist.so.10 -> libnppist.so.10.2.1.300
-rw-r--r-- 1 root root  20M Mar  1  2021 libnppist.so.10.2.1.300
-rw-r--r-- 1 root root  23M Mar  1  2021 libnppist_static.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libnppisu.so -> libnppisu.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libnppisu.so.10 -> libnppisu.so.10.2.1.300
-rw-r--r-- 1 root root 476K Mar  1  2021 libnppisu.so.10.2.1.300
-rw-r--r-- 1 root root  12K Mar  1  2021 libnppisu_static.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libnppitc.so -> libnppitc.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libnppitc.so.10 -> libnppitc.so.10.2.1.300
-rw-r--r-- 1 root root 3.0M Mar  1  2021 libnppitc.so.10.2.1.300
-rw-r--r-- 1 root root 3.1M Mar  1  2021 libnppitc_static.a
lrwxrwxrwx 1 root root   13 Mar  1  2021 libnpps.so -> libnpps.so.10
lrwxrwxrwx 1 root root   21 Mar  1  2021 libnpps.so.10 -> libnpps.so.10.2.1.300
-rw-r--r-- 1 root root 9.1M Mar  1  2021 libnpps.so.10.2.1.300
-rw-r--r-- 1 root root  11M Mar  1  2021 libnpps_static.a
lrwxrwxrwx 1 root root   15 Mar  1  2021 libnvblas.so -> libnvblas.so.10
lrwxrwxrwx 1 root root   23 Mar  1  2021 libnvblas.so.10 -> libnvblas.so.10.2.3.300
-rw-r--r-- 1 root root 528K Mar  1  2021 libnvblas.so.10.2.3.300
lrwxrwxrwx 1 root root   16 Mar  1  2021 libnvgraph.so -> libnvgraph.so.10
lrwxrwxrwx 1 root root   22 Mar  1  2021 libnvgraph.so.10 -> libnvgraph.so.10.2.300
-rw-r--r-- 1 root root 158M Mar  1  2021 libnvgraph.so.10.2.300
-rw-r--r-- 1 root root 161M Mar  1  2021 libnvgraph_static.a
-rw-r--r-- 1 root root 7.1M Mar  1  2021 libnvperf_host.so
-rw-r--r-- 1 root root 1.1M Mar  1  2021 libnvperf_target.so
lrwxrwxrwx 1 root root   25 Mar  1  2021 libnvrtc-builtins.so -> libnvrtc-builtins.so.10.2
lrwxrwxrwx 1 root root   29 Mar  1  2021 libnvrtc-builtins.so.10.2 -> libnvrtc-builtins.so.10.2.300
-rw-r--r-- 1 root root 4.6M Mar  1  2021 libnvrtc-builtins.so.10.2.300
lrwxrwxrwx 1 root root   16 Mar  1  2021 libnvrtc.so -> libnvrtc.so.10.2
lrwxrwxrwx 1 root root   20 Mar  1  2021 libnvrtc.so.10.2 -> libnvrtc.so.10.2.300
-rw-r--r-- 1 root root  20M Mar  1  2021 libnvrtc.so.10.2.300
lrwxrwxrwx 1 root root   18 Mar  1  2021 libnvToolsExt.so -> libnvToolsExt.so.1
lrwxrwxrwx 1 root root   22 Mar  1  2021 libnvToolsExt.so.1 -> libnvToolsExt.so.1.0.0
-rw-r--r-- 1 root root  44K Mar  1  2021 libnvToolsExt.so.1.0.0
drwxr-xr-x 2 root root 4.0K Mar 29 19:13 stubs

This results in me having to docker run -v /usr/local/cuda-10.2:/usr/local/cuda-10.2:ro everytime.
Please fix this ASAP, I assume the nvidia-docker2 right now is still immature.

I was also trying to build Torch-TensorRT inside the dockerfile, but it didn’t work even though I’ve correctly set the /etc/docker/deamon.json:

{
    "runtimes": {
        "nvidia": {
            "path": "nvidia-container-runtime",
            "runtimeArgs": []
        }
    },
    "default-runtime": "nvidia"
}

However, it didn’t work, so I have to put it into a second stage after I’ve docker run -v /usr/local/cuda-10.2:/usr/local/cuda-10.2:ro the CPU-only image

Similar things happen to cuDNN libraries

I don’t want to install the bulky nvidia-cuda and nvidia-l4t-cuda or other sub-packages of nvidia-jetpack

It’ll be nice if VPI and other sub-packages of nvidia-jetpack can also be automatically mounted

@Tom_Notch have you installed the nvidia-container-csv-* packages?

$ apt-cache search nvidia-container-*
libnvidia-container-tools - NVIDIA container runtime library (command-line tools)
libnvidia-container0 - NVIDIA container runtime library
libnvidia-container1 - NVIDIA container runtime library
nvidia-container-csv-cuda - Jetpack CUDA CSV file
nvidia-container-csv-cudnn - Jetpack CUDNN CSV file
nvidia-container-csv-tensorrt - Jetpack TensorRT CSV file
nvidia-container-csv-visionworks - Jetpack VisionWorks CSV file
nvidia-container-runtime - NVIDIA container runtime
nvidia-container-toolkit - NVIDIA container runtime hook
nvidia-container - NVIDIA Container Meta Package

You should have these files that mount the various packages into container:

$ ls /etc/nvidia-container-runtime/host-files-for-container.d/
cuda.csv  cudnn.csv  l4t.csv  tensorrt.csv  visionworks.csv

Yes, I have JetPack installed on host via SDK manager.
And no, it didn’t work.
By “I don’t want to install the bulky nvidia-cuda and nvidia-l4t-cuda or other sub-packages of nvidia-jetpack”, I mean inside the docker image

❯ apt-cache search nvidia-container-*
zsh: no matches found: nvidia-container-*
❯ sudo apt list --installed | egrep -i "nvidia-container"

WARNING: apt does not have a stable CLI interface. Use with caution in scripts.

libnvidia-container-tools/stable,now 1.7.0-1 arm64 [installed]
libnvidia-container0/stable,now 0.10.0+jetpack arm64 [installed]
libnvidia-container1/stable,now 1.7.0-1 arm64 [installed]
nvidia-container/stable,now 4.6.3-b17 arm64 [installed]
nvidia-container-csv-cuda/stable,now 10.2.460-1 arm64 [installed]
nvidia-container-csv-cudnn/stable,now 8.2.1.32-1+cuda10.2 arm64 [installed]
nvidia-container-csv-tensorrt/stable,now 8.2 arm64 [installed]
nvidia-container-csv-visionworks/stable,now 1.6.0.501 arm64 [installed]
nvidia-container-runtime/stable,now 3.7.0-1 all [installed]
nvidia-container-toolkit/stable,now 1.7.0-1 arm64 [installed]
❯ ls /etc/nvidia-container-runtime/host-files-for-container.d/
cuda.csv  cudnn.csv  l4t.csv  tensorrt.csv  visionworks.csv
❯ docker run --hostname $(hostname) --network host --runtime nvidia --privileged --rm -itd nvcr.io/nvidia/l4t-ml:r32.7.1-py3
523c0a1888abc42d9ba0e43df8e14bcc500c6d60e9a052cb1d647dafd0af92e5
❯ docker attach 523c0a1888ab
root@airlab-wildfire-ordv2:/# ll /usr/local/cuda/lib64/
total 1544
drwxr-xr-x 1 root root   4096 Dec 15  2021 ./
drwxr-xr-x 1 root root   4096 Dec 15  2021 ../
-rw-r--r-- 1 root root 679636 Dec 15  2021 libcudadevrt.a
-rw-r--r-- 1 root root 888074 Dec 15  2021 libcudart_static.a
drwxr-xr-x 2 root root   4096 Dec 15  2021 stubs/
root@airlab-wildfire-ordv2:/# python3
Python 3.6.9 (default, Dec  8 2021, 21:08:43)
[GCC 8.4.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/local/lib/python3.6/dist-packages/torch/__init__.py", line 196, in <module>
    _load_global_deps()
  File "/usr/local/lib/python3.6/dist-packages/torch/__init__.py", line 149, in _load_global_deps
    ctypes.CDLL(lib_path, mode=ctypes.RTLD_GLOBAL)
  File "/usr/lib/python3.6/ctypes/__init__.py", line 348, in __init__
    self._handle = _dlopen(self._name, mode)
OSError: libcurand.so.10: cannot open shared object file: No such file or directory
>>>

On JetPack 4, these packages get mounted into the container from host device and do not represent added storage in the container. Given that your environment is in an unknown state due to the various packages you have manually installed, what I would recommend is reflashing your device with SDK Manager and installing the full JetPack, at which point the container runtime should work as expected. Then once you have it working, you can remove packages you don’t want if desired.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.