Hi all
For a project i’m trying to run the “segmentation_models_pytorch” library, which requires pytorch>2.5
I couldn’t find it on PyTorch for Jetson - Jetson & Embedded Systems / Announcements - NVIDIA Developer Forums
So then i tried doing it via jetson-containers, as according to Dusty from nvidia it should work:
dusty-nv/jetson-containers: Machine Learning Containers for NVIDIA Jetson and JetPack-L4T
jetson-containers build --name=my_pytorch_container pytorch
however errors out.
The full log:
DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0
environment-variable.
Sending build context to Docker daemon 19.97kB
Step 1/5 : ARG BASE_IMAGE
Step 2/5 : FROM ${BASE_IMAGE}
---> 981912c48e9a
Step 3/5 : ENV DEBIAN_FRONTEND=noninteractive LANGUAGE=en_US:en LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
---> Using cache
---> 8e698c1ab824
Step 4/5 : RUN set -ex && apt-get update && apt-get install -y --no-install-recommends locales locales-all tzdata && locale-gen en_US $LANG && update-locale LC_ALL=$LC_ALL LANG=$LANG && locale && apt-get install -y --no-install-recommends build-essential software-properties-common apt-transport-https ca-certificates lsb-release pkg-config gnupg git git-lfs gdb wget wget2 curl nano zip unzip time sshpass ssh-client && apt-get clean && rm -rf /var/lib/apt/lists/* && gcc --version && g++ --version
---> Using cache
---> fb0aa93d1c72
Step 5/5 : COPY tarpack /usr/local/bin/
---> Using cache
---> 857cbaf28680
Successfully built 857cbaf28680
Successfully tagged my_pytorch_container:r36.4.3-cu124-build-essential
DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0
environment-variable.
Sending build context to Docker daemon 40.45kB
Step 1/4 : ARG BASE_IMAGE
Step 2/4 : FROM ${BASE_IMAGE}
---> 857cbaf28680
Step 3/4 : ARG PIP_INDEX_REPO PIP_UPLOAD_REPO PIP_UPLOAD_USER PIP_UPLOAD_PASS PIP_TRUSTED_HOSTS TAR_INDEX_URL SCP_UPLOAD_URL SCP_UPLOAD_USER SCP_UPLOAD_PASS
---> Using cache
---> 4c11788fc1bc
Step 4/4 : ENV TAR_INDEX_URL=${TAR_INDEX_URL} PIP_INDEX_URL=${PIP_INDEX_REPO} PIP_TRUSTED_HOST=${PIP_TRUSTED_HOSTS} TWINE_REPOSITORY_URL=${PIP_UPLOAD_REPO} TWINE_USERNAME=${PIP_UPLOAD_USER} TWINE_PASSWORD=${PIP_UPLOAD_PASS} SCP_UPLOAD_URL=${SCP_UPLOAD_URL} SCP_UPLOAD_USER=${SCP_UPLOAD_USER} SCP_UPLOAD_PASS=${SCP_UPLOAD_PASS}
---> Using cache
---> a536bc73e566
Successfully built a536bc73e566
Successfully tagged my_pytorch_container:r36.4.3-cu124-pip_cache_cu124
DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0
environment-variable.
Sending build context to Docker daemon 40.45kB
Step 1/9 : ARG BASE_IMAGE
Step 2/9 : FROM ${BASE_IMAGE}
---> a536bc73e566
Step 3/9 : ARG CUDA_URL CUDA_DEB CUDA_PACKAGES CUDA_ARCH_LIST DISTRO="ubuntu2004"
---> Using cache
---> b96c8ba879fb
Step 4/9 : COPY install.sh /tmp/install_cuda.sh
---> Using cache
---> 09ab437cfdcb
Step 5/9 : RUN /tmp/install_cuda.sh
---> Using cache
---> da3f76ef4b6f
Step 6/9 : ENV CUDA_HOME="/usr/local/cuda"
---> Using cache
---> 8bad0cecda61
Step 7/9 : ENV NVCC_PATH="$CUDA_HOME/bin/nvcc"
---> Using cache
---> b374566a2c41
Step 8/9 : ENV NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=all CUDAARCHS=${CUDA_ARCH_LIST} CUDA_ARCHITECTURES=${CUDA_ARCH_LIST} CUDA_HOME="/usr/local/cuda" CUDNN_LIB_PATH="/usr/lib/aarch64-linux-gnu" CUDNN_LIB_INCLUDE_PATH="/usr/include" CMAKE_CUDA_COMPILER=${NVCC_PATH} CUDA_NVCC_EXECUTABLE=${NVCC_PATH} CUDACXX=${NVCC_PATH} TORCH_NVCC_FLAGS="-Xfatbin -compress-all" CUDA_BIN_PATH="${CUDA_HOME}/bin" CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" PATH="$CUDA_HOME/bin:${PATH}" LD_LIBRARY_PATH="${CUDA_HOME}/compat:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" DEBIAN_FRONTEND=noninteractive
---> Using cache
---> 47ed8950e010
Step 9/9 : WORKDIR /
---> Using cache
---> 21d2b7d0204d
Successfully built 21d2b7d0204d
Successfully tagged my_pytorch_container:r36.4.3-cu124-cuda_12.4
{
"cuda" : {
"name" : "CUDA SDK",
"version" : "12.4.1"
},
"cuda_cccl" : {
"name" : "CUDA C++ Core Compute Libraries",
"version" : "12.4.127"
},
"cuda_compat" : {
"name" : "CUDA Specific Libraries",
"version" : "12.4.35753180"
},
"cuda_cudart" : {
"name" : "CUDA Runtime (cudart)",
"version" : "12.4.127"
},
"cuda_cuobjdump" : {
"name" : "cuobjdump",
"version" : "12.4.127"
},
"cuda_cupti" : {
"name" : "CUPTI",
"version" : "12.4.127"
},
"cuda_cuxxfilt" : {
"name" : "CUDA cu++ filt",
"version" : "12.4.127"
},
"cuda_gdb" : {
"name" : "CUDA GDB",
"version" : "12.4.127"
},
"cuda_nvcc" : {
"name" : "CUDA NVCC",
"version" : "12.4.131"
},
"cuda_nvdisasm" : {
"name" : "CUDA nvdisasm",
"version" : "12.4.127"
},
"cuda_nvml_dev" : {
"name" : "CUDA NVML Headers",
"version" : "12.4.127"
},
"cuda_nvprune" : {
"name" : "CUDA nvprune",
"version" : "12.4.127"
},
"cuda_nvrtc" : {
"name" : "CUDA NVRTC",
"version" : "12.4.127"
},
"cuda_nvtx" : {
"name" : "CUDA NVTX",
"version" : "12.4.127"
},
"cuda_sanitizer_api" : {
"name" : "CUDA Compute Sanitizer API",
"version" : "12.4.127"
},
"libcublas" : {
"name" : "CUDA cuBLAS",
"version" : "12.4.5.8"
},
"libcudla" : {
"name" : "CUDA cuDLA",
"version" : "12.4.127"
},
"libcufft" : {
"name" : "CUDA cuFFT",
"version" : "11.2.1.3"
},
"libcufile" : {
"name" : "GPUDirect Storage (cufile)",
"version" : "1.9.1.3"
},
"libcurand" : {
"name" : "CUDA cuRAND",
"version" : "10.3.5.147"
},
"libcusolver" : {
"name" : "CUDA cuSOLVER",
"version" : "11.6.1.9"
},
"libcusparse" : {
"name" : "CUDA cuSPARSE",
"version" : "12.3.1.170"
},
"libnpp" : {
"name" : "CUDA NPP",
"version" : "12.2.5.30"
},
"libnvfatbin" : {
"name" : "Fatbin interaction library",
"version" : "12.4.127"
},
"libnvjitlink" : {
"name" : "JIT Linker Library",
"version" : "12.4.127"
},
"libnvjpeg" : {
"name" : "CUDA nvJPEG",
"version" : "12.3.1.117"
},
"nsight_compute" : {
"name" : "Nsight Compute",
"version" : "2024.1.1.4"
},
"nvidia_fs" : {
"name" : "NVIDIA file-system",
"version" : "2.19.7"
}
}
DEPRECATED: The legacy builder is deprecated and will be removed in a future release.
BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0
environment-variable.
Sending build context to Docker daemon 22.53kB
Step 1/7 : ARG BASE_IMAGE
Step 2/7 : FROM ${BASE_IMAGE}
---> 21d2b7d0204d
Step 3/7 : ARG CUDNN_URL
---> Using cache
---> 3d3095a73b91
Step 4/7 : ARG CUDNN_DEB
---> Using cache
---> 81fd77b0d4d8
Step 5/7 : ARG CUDNN_PACKAGES
---> Using cache
---> af5748cefa94
Step 6/7 : RUN echo "Downloading ${CUDNN_DEB}" && mkdir /tmp/cudnn && cd /tmp/cudnn && wget --quiet --show-progress --progress=bar:force:noscroll ${CUDNN_URL} && dpkg -i *.deb && cp /var/cudnn-local-tegra-repo-*/cudnn-local-tegra-*-keyring.gpg /usr/share/keyrings/ && apt-get update && apt-cache search cudnn && apt-get install -y --no-install-recommends ${CUDNN_PACKAGES} && rm -rf /var/lib/apt/lists/* && apt-get clean && dpkg --list | grep cudnn && dpkg -P ${CUDNN_DEB} && rm -rf /tmp/cudnn
---> Using cache
---> 281553bc8601
Step 7/7 : RUN cd /usr/src/cudnn_samples_v*/conv_sample/ && make -j$(nproc)
---> Using cache
---> 572a8ed4255f
Successfully built 572a8ed4255f
Successfully tagged my_pytorch_container:r36.4.3-cu124-cudnn
#define CUDNN_MAJOR 9
#define CUDNN_MINOR 0
#define CUDNN_VERSION (CUDNN_MAJOR * 10000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)
#define CUDNN_MAX_SM_MAJOR_NUMBER 9
#define CUDNN_MAX_SM_MINOR_NUMBER 0
#define CUDNN_MAX_DEVICE_VERSION (CUDNN_MAX_SM_MAJOR_NUMBER * 100 + CUDNN_MAX_SM_MINOR_NUMBER * 10)
Executing: conv_sample
double free or corruption (out)
test.sh: line 14: 23 Aborted (core dumped) ./conv_sample
Namespace(packages=['pytorch'], name='my_pytorch_container', base='', multiple=False, build_flags='', build_args='', package_dirs=[''], list_packages=False, show_packages=False, skip_packages=[''], skip_errors=False, skip_tests=[''], test_only=[''], simulate=False, push='', logs='', verbose=False, no_github_api=False)
-- L4T_VERSION=36.4.3
-- JETPACK_VERSION=5.1
-- CUDA_VERSION=12.4
-- PYTHON_VERSION=3.10
-- LSB_RELEASE=22.04 (jammy)
-- Building containers ['build-essential', 'pip_cache:cu124', 'cuda:12.4', 'cudnn', 'python', 'numpy', 'cmake', 'onnx', 'pytorch']
-- Building container my_pytorch_container:r36.4.3-cu124-build-essential
sudo DOCKER_BUILDKIT=0 docker build --network=host --tag my_pytorch_container:r36.4.3-cu124-build-essential \
--file /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/build/build-essential/Dockerfile \
--build-arg BASE_IMAGE=ubuntu:22.04 \
/home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/build/build-essential \
2>&1 | tee /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/logs/20250117_135724/build/my_pytorch_container_r36.4.3-cu124-build-essential.txt; exit ${PIPESTATUS[0]}
-- Building container my_pytorch_container:r36.4.3-cu124-pip_cache_cu124
sudo DOCKER_BUILDKIT=0 docker build --network=host --tag my_pytorch_container:r36.4.3-cu124-pip_cache_cu124 \
--file /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cuda/Dockerfile.pip \
--build-arg BASE_IMAGE=my_pytorch_container:r36.4.3-cu124-build-essential \
--build-arg TAR_INDEX_URL="https://apt.jetson-ai-lab.dev:8000/jp5/cu124" \
--build-arg PIP_INDEX_REPO="https://pypi.jetson-ai-lab.dev/jp5/cu124" \
--build-arg PIP_UPLOAD_REPO="http://localhost/jp5/cu124" \
--build-arg PIP_UPLOAD_USER="jp5" \
--build-arg PIP_UPLOAD_PASS="none" \
--build-arg SCP_UPLOAD_URL="localhost:/dist/jp5/cu124" \
--build-arg SCP_UPLOAD_USER="None" \
--build-arg SCP_UPLOAD_PASS="None" \
/home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cuda \
2>&1 | tee /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/logs/20250117_135724/build/my_pytorch_container_r36.4.3-cu124-pip_cache_cu124.txt; exit ${PIPESTATUS[0]}
-- Building container my_pytorch_container:r36.4.3-cu124-cuda_12.4
sudo DOCKER_BUILDKIT=0 docker build --network=host --tag my_pytorch_container:r36.4.3-cu124-cuda_12.4 \
--file /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cuda/Dockerfile \
--build-arg BASE_IMAGE=my_pytorch_container:r36.4.3-cu124-pip_cache_cu124 \
--build-arg CUDA_URL="https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda-tegra-repo-ubuntu2204-12-4-local_12.4.1-1_arm64.deb" \
--build-arg CUDA_DEB="cuda-tegra-repo-ubuntu2204-12-4-local" \
--build-arg CUDA_PACKAGES="cuda-toolkit*" \
--build-arg CUDA_ARCH_LIST="87" \
--build-arg DISTRO="ubuntu2204" \
/home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cuda \
2>&1 | tee /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/logs/20250117_135724/build/my_pytorch_container_r36.4.3-cu124-cuda_12.4.txt; exit ${PIPESTATUS[0]}
-- Testing container my_pytorch_container:r36.4.3-cu124-cuda_12.4 (cuda:12.4/test.sh)
sudo docker run -t --rm --runtime=nvidia --network=host \
--volume /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cuda:/test \
--volume /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/data:/data \
--workdir /test \
my_pytorch_container:r36.4.3-cu124-cuda_12.4 \
/bin/bash -c '/bin/bash test.sh' \
2>&1 | tee /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/logs/20250117_135724/test/my_pytorch_container_r36.4.3-cu124-cuda_12.4_test.sh.txt; exit ${PIPESTATUS[0]}
-- Building container my_pytorch_container:r36.4.3-cu124-cudnn
sudo DOCKER_BUILDKIT=0 docker build --network=host --tag my_pytorch_container:r36.4.3-cu124-cudnn \
--file /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cudnn/Dockerfile \
--build-arg BASE_IMAGE=my_pytorch_container:r36.4.3-cu124-cuda_12.4 \
--build-arg CUDNN_URL="https://developer.download.nvidia.com/compute/cudnn/9.0.0/local_installers/cudnn-local-tegra-repo-ubuntu2204-9.0.0_1.0-1_arm64.deb" \
--build-arg CUDNN_DEB="cudnn-local-tegra-repo-ubuntu2204-9.0.0" \
--build-arg CUDNN_PACKAGES="libcudnn*-dev libcudnn*-samples" \
/home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cudnn \
2>&1 | tee /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/logs/20250117_135724/build/my_pytorch_container_r36.4.3-cu124-cudnn.txt; exit ${PIPESTATUS[0]}
-- Testing container my_pytorch_container:r36.4.3-cu124-cudnn (cudnn:9.0/test.sh)
sudo docker run -t --rm --runtime=nvidia --network=host \
--volume /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cudnn:/test \
--volume /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/data:/data \
--workdir /test \
my_pytorch_container:r36.4.3-cu124-cudnn \
/bin/bash -c '/bin/bash test.sh' \
2>&1 | tee /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/logs/20250117_135724/test/my_pytorch_container_r36.4.3-cu124-cudnn_test.sh.txt; exit ${PIPESTATUS[0]}
more error:
Traceback (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/bdck/PROJECTS/semantic_segmentation/jetson-containers/jetson_containers/build.py", line 112, in <module>
build_container(args.name, args.packages, args.base, args.build_flags, args.build_args, args.simulate, args.skip_tests, args.test_only, args.push, args.no_github_api, args.skip_packages)
File "/home/bdck/PROJECTS/semantic_segmentation/jetson-containers/jetson_containers/container.py", line 154, in build_container
test_container(container_name, pkg, simulate)
File "/home/bdck/PROJECTS/semantic_segmentation/jetson-containers/jetson_containers/container.py", line 327, in test_container
status = subprocess.run(cmd.replace(_NEWLINE_, ' '), executable='/bin/bash', shell=True, check=True)
File "/usr/lib/python3.10/subprocess.py", line 526, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command 'sudo docker run -t --rm --runtime=nvidia --network=host --volume /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/packages/cuda/cudnn:/test --volume /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/data:/data --workdir /test my_pytorch_container:r36.4.3-cu124-cudnn /bin/bash -c '/bin/bash test.sh' 2>&1 | tee /home/bdck/PROJECTS/semantic_segmentation/jetson-containers/logs/20250117_135724/test/my_pytorch_container_r36.4.3-cu124-cudnn_test.sh.txt; exit ${PIPESTATUS[0]}' returned non-zero exit status 134.