Here is the dockerfile contents:
Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS’’ AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Multistage build.
Base image on the minimum Triton container
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.01-py3-min
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
ARG TRITON_ENABLE_GPU=ON
ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
DCGM version to install for Model Analyzer
ARG DCGM_VERSION=3.2.6
ARG NVIDIA_TRITON_SERVER_SDK_VERSION=unknown
ARG NVIDIA_BUILD_ID=unknown
############################################################################
Build image
############################################################################
FROM ${BASE_IMAGE} AS sdk_build
Ensure apt-get won’t prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update &&
apt-get install -y --no-install-recommends
ca-certificates
software-properties-common
autoconf
automake
build-essential
curl
git
gperf
libb64-dev
libgoogle-perftools-dev
libopencv-dev
libopencv-core-dev
libssl-dev
libtool
pkg-config
python3
python3-pip
python3-dev
rapidjson-dev
vim
wget
python3-pdfkit
openjdk-11-jdk
maven &&
pip3 install --upgrade wheel setuptools &&
pip3 install --upgrade grpcio-tools &&
pip3 install --upgrade pip
Client build requires recent version of CMake (FetchContent required)
RUN apt update -q=2
&& apt install -y gpg wget
&& wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
&& . /etc/os-release
&& echo “deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] Kitware APT Repository $UBUNTU_CODENAME main” | tee /etc/apt/sources.list.d/kitware.list >/dev/null
&& apt-get update -q=2
&& apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7*
&& cmake --version
Build expects “python” executable (not python3).
RUN rm -f /usr/bin/python &&
ln -s /usr/bin/python3 /usr/bin/python
Build the client library and examples
ARG TRITON_CLIENT_REPO_SUBDIR
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_ENABLE_GPU
ARG JAVA_BINDINGS_MAVEN_VERSION
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
ARG TARGETPLATFORM
WORKDIR /workspace
COPY TRITON_VERSION .
COPY ${TRITON_CLIENT_REPO_SUBDIR} client
WORKDIR /workspace/build
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install
-DTRITON_VERSION=cat /workspace/TRITON_VERSION
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG}
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG}
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG}
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON
-DTRITON_ENABLE_JAVA_HTTP=ON
-DTRITON_ENABLE_PERF_ANALYZER=ON
-DTRITON_ENABLE_PERF_ANALYZER_C_API=ON
-DTRITON_ENABLE_PERF_ANALYZER_TFS=ON
-DTRITON_ENABLE_PERF_ANALYZER_TS=ON
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
RUN make -j16 cc-clients python-clients java-clients &&
rm -fr ~/.m2
Install Java API Bindings
RUN if [ “$TARGETPLATFORM” = “linux/amd64” ]; then
source /workspace/client/src/java-api-bindings/scripts/install_dependencies_and_build.sh
–maven-version ${JAVA_BINDINGS_MAVEN_VERSION}
–core-tag ${TRITON_CORE_REPO_TAG}
–javacpp-tag ${JAVA_BINDINGS_JAVACPP_PRESETS_TAG}
–jar-install-path /workspace/install/java-api-bindings;
fi
############################################################################
Create sdk container
############################################################################
FROM ${BASE_IMAGE}
Ensure apt-get won’t prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ARG DCGM_VERSION
ARG TRITON_CORE_REPO_TAG
ARG TARGETPLATFORM
ARG TRITON_ENABLE_GPU
RUN apt-get update &&
apt-get install -y --no-install-recommends
software-properties-common
curl
git
gperf
libb64-dev
libgoogle-perftools-dev
libopencv-dev
libopencv-core-dev
libssl-dev
libtool
python3
python3-pip
python3-dev
vim
wget
python3-pdfkit
maven
default-jdk &&
pip3 install --upgrade wheel setuptools &&
pip3 install --upgrade grpcio-tools &&
pip3 install --upgrade pip
WORKDIR /workspace
COPY TRITON_VERSION .
COPY NVIDIA_Deep_Learning_Container_License.pdf .
COPY --from=sdk_build /workspace/client/ client/
COPY --from=sdk_build /workspace/install/ install/
RUN cd install &&
export VERSION=cat /workspace/TRITON_VERSION
&&
tar zcf /workspace/v$VERSION.clients.tar.gz *
For CI testing need to copy over L0_sdk test and L0_client_build_variants test.
RUN mkdir qa
COPY qa/L0_sdk qa/L0_sdk
COPY qa/L0_client_build_variants qa/L0_client_build_variants
Create a directory for all the python client tests to enable unit testing
RUN mkdir -p qa/python_client_unit_tests/
COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_client_unit_tests/
Install an image needed by the quickstart and other documentation.
COPY qa/images/mug.jpg images/mug.jpg
Install the dependencies needed to run the client examples. These
are not needed for building but including them allows this image to
be used to run the client examples.
RUN pip3 install --upgrade numpy pillow attrdict &&
find install/python/ -maxdepth 1 -type f -name
“tritonclient-linux.whl” | xargs printf – ‘%s[all]’ |
xargs pip3 install --upgrade
Install DCGM
RUN if [ “$TRITON_ENABLE_GPU” = “ON” ]; then \
Here is the output I get when running “/opt/tritonserver/clients/bin/perf_analyzer -m simple” after running “sudo /opt/tritonserver/bin/tritonserver --model-repository=./model_repository --backend-directory=/opt/tritonserver/backends --backend-config=tensorflow,version=2”:
test@test-desktop:~/Downloads$ /opt/tritonserver/clients/bin/perf_analyzer -m simple
*** Measurement Settings ***
Batch size: 1
Service Kind: Triton
Using “time_windows” mode for stabilization
Measurement window: 5000 msec
Using synchronous calls for inference
Stabilizing using average latency
Request concurrency: 1
Client:
Request count: 14871
Throughput: 825.774 infer/sec
Avg latency: 1208 usec (standard deviation 469 usec)
p50 latency: 1106 usec
p90 latency: 1439 usec
p95 latency: 1677 usec
p99 latency: 3128 usec
Avg HTTP time: 1194 usec (send/recv 181 usec + response wait 1013 usec)
Server:
Inference count: 14871
Execution count: 14871
Successful request count: 14871
Avg request latency: 542 usec (overhead 63 usec + queue 65 usec + compute input 32 usec + compute infer 362 usec + compute output 19 usec)
Inferences/Second vs. Client Average Batch Latency
Concurrency: 1, throughput: 825.774 infer/sec, latency 1208 usec