Problem compiling custom extension with CUDA dependency

Greetings,

I am using the following DeepStream 6.3 docker image nvcr.io/nvidia/deepstream:6.3-gc-triton-devel to develop custom extensions for our application. However, I am running into the following error when I compile my code:

INFO: Analyzed target //extensions/sensor:register_sensor_ext (0 packages loaded, 0 targets configured).
INFO: Found 1 target...
ERROR: /workspace/KGE/extensions/sensor/BUILD:7:19: Compiling extensions/sensor/v4l2_source.cpp failed: undeclared inclusion(s) in rule '//extensions/sensor:libgxf_sensor.so':
this rule is missing dependency declarations for the following files included by 'extensions/sensor/v4l2_source.cpp':
  '/usr/local/cuda-12.1/targets/x86_64-linux/include/nv/detail/__target_macros'
  '/usr/local/cuda-12.1/targets/x86_64-linux/include/nv/detail/__preprocessor'
Target //extensions/sensor:register_sensor_ext failed to build
Use --verbose_failures to see the command lines of failed build steps.
INFO: Elapsed time: 4.882s, Critical Path: 4.68s
INFO: 2 processes: 2 internal.
FAILED: Build did NOT complete successfully

My folder structure is as follows:

Workspace
├─ bzl
│  ├─ BUILD
│  └─ cuda.bzl
├─ extensions
│  └─ sensor
│     ├─ BUILD
│     ├─ sensor_ext.cpp
│     ├─ v4l2_source.cpp
│     └─ v4l2_source.hpp
├─ third_party
│  ├─ BUILD
│  ├─ clara_gxf_arm64.BUILD
│  ├─ clara_gxf_x86_64.BUILD
│  ├─ cuda_arm64.BUILD
│  ├─ cuda_x86_64.BUILD
│  └─ packages.bzl
├─ .bazelrc
└─ WORKSPACE

The bzl/cuda.bzl folder is defining the CUDA sources and where to find them

"""
Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.
"""
CUDA_SO = [
    "cudart",
    "cufft",
    "curand",
    "cusolver",
    "cusparse",
    "nvgraph",
    "nvrtc",
]

NPP_SO = [
    "nppc",
    "nppial",
    "nppicc",
    "nppicom",
    "nppidei",
    "nppif",
    "nppig",
    "nppim",
    "nppist",
    "nppisu",
    "nppitc",
    "npps",
]

# Get the path for the shared library with given name for the given version
def cuda_so_path(name, version):
    major_version = version.split(".")[0]
    return "usr/local/cuda-" + version + "/lib64/lib" + name + ".so*"

# Get the path for libcuda.so for the given version. A stub is used as the library is provided
# by the CUDA driver and is required to be available on the system.
def cuda_driver_so_path(family, version):
    return "usr/local/cuda-" + version + "/targets/" + family + "-linux/lib/stubs/libcuda.so"

# Get the path for libnvToolsExt.so for the given version. A stub is used as the library is provided
# by the CUDA driver and is required to be available on the system.
def cuda_nv_tools_ext_so_path(family, version):
    return "usr/local/cuda-" + version + "/targets/" + family + "-linux/lib/libnvToolsExt.so.1"

# Creates CUDA related dependencies. The arguments `family` and `version` are used to find the
# library and header files in the package
def cuda_device_deps(family, version):
    cuda_include_prefix = "usr/local/cuda-" + version + "/targets/" + family + "-linux/include"

    # CUDA
    cuda_hdrs = native.glob([
        # FIXME separate out headers
        cuda_include_prefix + "/*.h",
        cuda_include_prefix + "/*.hpp",
        cuda_include_prefix + "/CL/*.h",
        cuda_include_prefix + "/crt/*",
        cuda_include_prefix + "/cuda/std/*",
        cuda_include_prefix + "/cuda/std/**/*",
        cuda_include_prefix + "/nv/detail/*",

    ])

    # Create a stub library for the CUDA base library provided by the driver
    native.cc_library(
        name = "cuda",
        hdrs = cuda_hdrs,
        srcs = [
            cuda_driver_so_path(family, version), 
            cuda_nv_tools_ext_so_path(family, version)
        ],
        strip_include_prefix = cuda_include_prefix,
        visibility = ["//visibility:public"],
    )

    # Create one library per CUDA shared libray
    for so in CUDA_SO:
        native.cc_library(
            name = so,
            hdrs = cuda_hdrs,
            srcs = native.glob([cuda_so_path(so, version)]),
            strip_include_prefix = cuda_include_prefix,
            visibility = ["//visibility:public"],
            linkopts = [
                "-Wl,--no-as-needed," +
                "-l:lib" + so + ".so," +
                "--as-needed",
            ],
        )

    # NPP
    npp_hdrs = native.glob([cuda_include_prefix + "/npp*.*"])  # FIXME separate out headers
    for so in NPP_SO:
        native.cc_library(
            name = so,
            hdrs = npp_hdrs,
            srcs = native.glob([cuda_so_path(so, version)]),
            # Dependency graph: nppc <- npps <- everything else
            deps = ["cudart"] +
                   ["nppc"] if so != "nppc" else [] +
                                                 ["npps"] if so != "npps" and so != "nppc" else [],
            strip_include_prefix = cuda_include_prefix,
            linkopts = [
                "-Wl,--no-as-needed," +
                "-l:lib" + so + ".so," +
                "--as-needed",
            ],
            visibility = ["//visibility:public"],
        )

    # THRUST CUDA Library.  Note: CUB template library is included into THRUST
    # library (as in CUDA 10).  With CUDA 10 support removal, it can be moved
    # into a separate library.
    native.cc_library(
        name = "thrust",
        hdrs = native.glob([cuda_include_prefix + "/thrust/**/*",
                            cuda_include_prefix + "/cub/**/*"]),
        deps = ["cudart"],
        strip_include_prefix = cuda_include_prefix,
        visibility = ["//visibility:public"],
    )

    # CUDNN and CUBLAS
    if family == 'x86_64':
        native.cc_library(
            name = "cudnn",
            hdrs = native.glob([cuda_include_prefix + "/cudnn*.h"]),
            includes = [cuda_include_prefix],
            strip_include_prefix = cuda_include_prefix,
            srcs = native.glob(["usr/local/cuda-" + version + "/lib64/libcudnn*.so*"]),
            deps = ["cudart"],
            linkstatic = True,
            linkopts = [
                "-Wl,--no-as-needed," +
                "-l:libcudnn.so.8," +
                "--as-needed",
            ],
            visibility = ["//visibility:public"],
        )
        native.cc_library(
            name = "cublas",
            hdrs = native.glob(["usr/include/*.h"]),
            srcs = native.glob(["usr/local/cuda-" + version + "/lib64/libcublas*.so*"]),
            strip_include_prefix = "usr/include",
            visibility = ["//visibility:public"],
            linkopts = [
                "-Wl,--no-as-needed," +
                "-l:libcublasLt.so,-l:libcublas.so," +
                "--as-needed",
            ],
        )
    else:
        native.cc_library(
            name = "cudnn",
            hdrs = native.glob(["usr/include/cudnn*.h"]),
            includes = [cuda_include_prefix],
            strip_include_prefix = "usr/include",
            srcs = native.glob(["usr/lib/aarch64-linux-gnu/libcudnn*.so*"]),
            deps = ["cudart"],
            linkstatic = True,
            linkopts = [
                "-Wl,--no-as-needed," +
                "-l:libcudnn.so.8," +
                "--as-needed",
            ],
            visibility = ["//visibility:public"],
        )
        native.cc_library(
            name = "cublas",
            hdrs = native.glob(["usr/include/*.h"]),
            srcs = native.glob(["usr/lib/aarch64-linux-gnu/libcublas*.so*"]),
            strip_include_prefix = "usr/include",
            visibility = ["//visibility:public"],
            linkopts = [
                "-Wl,--no-as-needed," +
                "-l:libcublasLt.so,-l:libcublas.so," +
                "--as-needed",
            ],
        )


# Selects the correct version of `target` based on the current platform
def _cuda_select(target):
    return select({
        "@com_extension_dev//build:platform_x86_64": ["@cuda_x86_64//:" + target],
#        "@@com_extension_dev//build:platform_jetson": ["@cuda_arm64//:" + target],
    })

# Creates all CUDA related dependencies for the current platform
def cuda_deps():
    TARGETS = ["cuda", "cublas"] + CUDA_SO + NPP_SO + ["cudnn", "thrust"]
    for target in TARGETS:
        native.cc_library(
            name = target,
            visibility = ["//visibility:public"],
            deps = _cuda_select(target),
        )

and the extensions/sensor/BUILD includes the following


load("@com_extension_dev//build:graph_extension.bzl", "graph_cc_extension")
load("@com_extension_dev//build:registry.bzl", "register_extension")

exports_files(["LICENSE"])

graph_cc_extension(
    name = "sensor",
    srcs = [
        "v4l2_source.cpp",
        "sensor_ext.cpp",
    ],
    hdrs = [
        "v4l2_source.hpp",
    ],
    deps = [
        "@StandardExtension",
        "//third_party:gxf_multimedia",
        "//third_party:cuda"
    ],
)

register_extension(
    name = "register_sensor_ext",
    badges = [""],
    extension = "sensor",
    labels = [
        "nvidia",
        "gpu",
    ],
    license = "LICENSE",
    license_file = ":LICENSE",
    ngc_dependencies = {
        "StandardExtension": "2.3.0",
        "MultimediaExtension": "2.3.0",
    },
    priority = "1",
    git_repository = "REMOVED",
    url = "REMOVED",
    uuid = "REMOVED",
    version = "1.0.0",
    visibility = ["//visibility:public"],
)

The two files required by my code exist in my container and I aslo tried adding them directly to my CUDA headers definition, but no help. Does Nvidia provide any example on how to build extensions which rely on CUDA as a dependency?

This seems to be just a bazel issue and has nothing to do with Deepstream.

I’m not an expert on bazel, but you can try this, this is the official solution for bazel.

I am not using CUDA in my code directly, but it’s imported by gxf, which is a dependency of my code. I already tried adding this to my WORKSPACE but it didn’t work.

Does Nvidia provide any sample code, where gxf is used inside a codelet?

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.