Problem with CUDA Integration in a ROS Node within a Docker Container

I am building a ROS node that utilizes CUDA and PyTorch within the node, and the node is built inside a docker container. I am struggling to get past the line of my CmakeLists “find_package(CUDA REQUIRED)” which is indicating that CUDA is not being installed correctly. The error as well as the current workflow is provided below. As a new user, I can only put one embedded media item, so the code will just be pasted in.

This is the error:

The Dockerfile first runs this install-libraries script which installs CUDA according to this website

install opencv

sudo apt-get update && sudo apt-get install -y cmake g++ wget unzip software-properties-common

sudo add-apt-repository universe

sudo apt-get update

Install CUDA

sudo wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-ubuntu2204.pin

sudo mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600

sudo wget https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda-tegra-repo-ubuntu2204-12-5-local_12.5.1-1_arm64.deb

sudo dpkg -i cuda-tegra-repo-ubuntu2204-12-5-local_12.5.1-1_arm64.deb

sudo cp /var/cuda-tegra-repo-ubuntu2204-12-5-local/cuda-*-keyring.gpg /usr/share/keyrings/

sudo apt-get update

sudo apt-get -y install cuda-toolkit-12-5 cuda-compat-12-5

After, the Dockerfile sets the necessary environment variables for CUDA:

Run install libraries script

WORKDIR /root/

COPY ./install-libraries.sh install-libraries.sh

RUN chmod +x ./install-libraries.sh

RUN ./install-libraries.sh

#ENV PATH=/usr/local/bin:$PATH

ENV CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.5

ENV CUDA_NVCC_EXECUTABLE=/usr/local/cuda-12.5/bin/nvcc

ENV CUDA_INCLUDE_DIRS=/usr/local/cuda-12.5/include

ENV CUDA_CUDART_LIBRARY=/usr/local/cuda-12.5/lib64/libcudart.so

ENV PATH=/usr/local/cuda-12.5/bin:$PATH

ENV LD_LIBRARY_PATH=/usr/local/cuda-12.5/lib64:$LD_LIBRARY_PATH

ARG CCACHE_DIR

RUN mkdir -p $CCACHE_DIR

Below is the relevant section of my CMakeLists:

cmake_minimum_required(VERSION 3.5)

project(ros2_node)

Default to C++17

set(CMAKE_CXX_STANDARD 17)

if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES “Clang”)

add_compile_options(-Wall -Wextra -Wpedantic)

endif()

find_package( OpenCV REQUIRED )

find_package(ament_cmake REQUIRED)

find_package(rclcpp REQUIRED)

find_package(rclcpp_action REQUIRED)

find_package(std_msgs REQUIRED)

find_package(action_msgs REQUIRED)

find_package(sensor_msgs REQUIRED)

find_package(rosidl_default_generators REQUIRED)

find_package(rosidl_typesupport_cpp REQUIRED)

find_package(std_srvs REQUIRED)

find_package(cv_bridge REQUIRED)

Manually set Torch and CUDA paths

set(CMAKE_CUDA_STANDARD 14)

set(CMAKE_CUDA_STANDARD_REQUIRED ON)

set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda-12.5)

set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)

list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR})

set(CUDA_NVCC_EXECUTABLE /usr/local/cuda-12.5/bin/nvcc)

set(CUDA_INCLUDE_DIRS /usr/local/cuda-12.5/include)

set(CUDA_CUDART_LIBRARY /usr/local/cuda-12.5/lib64/libcudart.so)

find_package( CUDA REQUIRED ) #this is line 33

For now, the rest of the install-libraries script regarding cuDNN and PyTorch are not relevant as I cannot even get past correctly integrating CUDA in the CMakeLists. I would like to point out that my current dockerfile and CMakeLists format work for other nodes that do not use CUDA, but instead use opencv and all the other libraries listed besides CUDA and Torch.

Hi,

Do you run the CMakefile file at building time or runtime?

For runtime, please make sure you have installed nvidia-container-tookit.
https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt

And please also launch your container with --runtime nvidia.
This allows GPU access within the container.

Thanks.

The Cmake File is being run during build time

Hi,

We want to reproduce this issue locally to gather more info.
Could you share the Dockerfile for this issue?

Thanks.

ARG FROM_IMAGE=ros:iron
ARG OVERLAY_WS=/opt/ros/overlay_ws

multi-stage for caching

FROM $FROM_IMAGE AS cacher

clone overlay source

ARG OVERLAY_WS
ARG BITBUCKET_USERNAME
ARG BITBUCKET_PASSWORD

WORKDIR $OVERLAY_WS/src
#RUN echo "
#repositories: \n\

ros2/demos: \n\

type: git \n\

url: https://${BITBUCKET_USERNAME}:${BITBUCKET_PASSWORD}@bitbucket.org/pursuant-health/pursuant-ros-camera.git \n\

version: main \n\

#" > …/overlay.repos
#RUN vcs import ./ < …/overlay.repos
COPY ./ros2_node ros2_node

copy manifests for caching

WORKDIR /opt
RUN mkdir -p /tmp/opt &&
find ./ -name “package.xml” |
xargs cp --parents -t /tmp/opt &&
find ./ -name “COLCON_IGNORE” |
xargs cp --parents -t /tmp/opt || true

multi-stage for building

FROM $FROM_IMAGE AS builder

RUN apt-get update && apt-get install -y
ccache
&& apt-get clean
&& rm -rf /var/lib/apt/lists/*

RUN ccache -s

RUN ln -s /usr/bin/ccache /usr/local/bin/gcc
&& ln -s /usr/bin/ccache /usr/local/bin/g++
&& ln -s /usr/bin/ccache /usr/local/bin/cc
&& ln -s /usr/bin/ccache /usr/local/bin/c++

RUN --mount=type=cache,target=/root/.ccache

Run install libraries script

WORKDIR /root/
COPY ./install-libraries.sh install-libraries.sh
RUN chmod +x ./install-libraries.sh
RUN ./install-libraries.sh

#ENV PATH=/usr/local/bin:$PATH

ENV CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.5
ENV CUDA_NVCC_EXECUTABLE=/usr/local/cuda-12.5/bin/nvcc
ENV CUDA_INCLUDE_DIRS=/usr/local/cuda-12.5/include
ENV CUDA_CUDART_LIBRARY=/usr/local/cuda-12.5/lib64/libcudart.so
ENV PATH=/usr/local/cuda-12.5/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda-12.5/lib64:$LD_LIBRARY_PATH

ARG CCACHE_DIR
RUN mkdir -p $CCACHE_DIR

install overlay dependencies

ARG OVERLAY_WS
WORKDIR $OVERLAY_WS
COPY --from=cacher /tmp/$OVERLAY_WS/src ./src
RUN apt-get update && . /opt/ros/$ROS_DISTRO/setup.sh &&
rosdep install -y
–from-paths src/
–ignore-src
&& rm -rf /var/lib/apt/lists/*

build overlay source

COPY --from=cacher $OVERLAY_WS/src ./src
ARG OVERLAY_MIXINS=“release”
RUN . /opt/ros/$ROS_DISTRO/setup.sh &&
colcon build
–mixin $OVERLAY_MIXINS

source entrypoint setup

ENV OVERLAY_WS $OVERLAY_WS
RUN sed --in-place --expression
‘$isource “$OVERLAY_WS/install/setup.bash”’
/ros_entrypoint.sh

COPY entrypoint.py entrypoint.py
RUN chmod +x entrypoint.py

run launch file

ENTRYPOINT [“./entrypoint.py”]

Hi,

We will give it a try and provide more info to you later.

Thanks.

Any update?

Hi,

Sorry for the late update.

It looks like your base image is ros:iron.
Could you try to use our ROS image which can find in the below link?

Thanks.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.