PART 1/2:
Hi rvinobha, thank you for the follow-up.
confirming config:
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
# Architecture of target platform. Supported architectures: amd64, arm64
riva_target_arch="amd64"
# Legacy arm64 platform to be enabled. Supported legacy platforms: xavier
riva_arm64_legacy_platform=""
# Enable or Disable Riva Services
service_enabled_asr=true
service_enabled_nlp=false
service_enabled_tts=false
# Enable Riva Enterprise
# If enrolled in Enterprise, enable Riva Enterprise by setting configuration
# here. You must explicitly acknowledge you have read and agree to the EULA.
# RIVA_API_KEY=<ngc api key>
# RIVA_API_NGC_ORG=<ngc organization>
# RIVA_EULA=accept
# Language code to fetch models of a specify language
# Currently only ASR supports languages other than English
# Supported language codes: en-US, de-DE, es-US, ru-RU, zh-CN, hi-IN
# for any language other than English, set service_enabled_nlp and service_enabled_tts to False
# for multiple languages enter space separated language codes.
language_code=("en-US")
# Specify one or more GPUs to use
# specifying more than one GPU is currently an experimental feature, and may result in undefined behaviours.
gpus_to_use="device=0"
# Specify the encryption key to use to deploy models
MODEL_DEPLOY_KEY="tlt_encode"
# Locations to use for storing models artifacts
#
# If an absolute path is specified, the data will be written to that location
# Otherwise, a docker volume will be used (default).
#
# riva_init.sh will create a `rmir` and `models` directory in the volume or
# path specified.
#
# RMIR ($riva_model_loc/rmir)
# Riva uses an intermediate representation (RMIR) for models
# that are ready to deploy but not yet fully optimized for deployment. Pretrained
# versions can be obtained from NGC (by specifying NGC models below) and will be
# downloaded to $riva_model_loc/rmir by `riva_init.sh`
#
# Custom models produced by NeMo or TLT and prepared using riva-build
# may also be copied manually to this location $(riva_model_loc/rmir).
#
# Models ($riva_model_loc/models)
# During the riva_init process, the RMIR files in $riva_model_loc/rmir
# are inspected and optimized for deployment. The optimized versions are
# stored in $riva_model_loc/models. The riva server exclusively uses these
# optimized versions.
riva_model_loc="riva-model-repo"
if [[ $riva_target_arch == "arm64" ]]; then
riva_model_loc="`pwd`/model_repository"
fi
# The default RMIRs are downloaded from NGC by default in the above $riva_rmir_loc directory
# If you'd like to skip the download from NGC and use the existing RMIRs in the $riva_rmir_loc
# then set the below $use_existing_rmirs flag to true. You can also deploy your set of custom
# RMIRs by keeping them in the riva_rmir_loc dir and use this quickstart script with the
# below flag to deploy them all together.
use_existing_rmirs=false
# Ports to expose for Riva services
riva_speech_api_port="50051"
# NGC orgs
riva_ngc_org="nvidia"
riva_ngc_team="riva"
riva_ngc_image_version="2.3.0"
riva_ngc_model_version="2.3.0"
# Pre-built models listed below will be downloaded from NGC. If models already exist in $riva-rmir
# then models can be commented out to skip download from NGC
########## ASR MODELS ##########
models_asr=()
### Citrinet-1024 models
for lang_code in ${language_code[@]}; do
modified_lang_code="${lang_code/-/_}"
modified_lang_code=${modified_lang_code,,}
if [[ $riva_target_arch == "arm64" ]]; then
models_asr+=(
### Citrinet-1024 Streaming w/ CPU decoder, best latency configuration
"${riva_ngc_org}/${riva_ngc_team}/models_asr_citrinet_1024_${modified_lang_code}_str:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
)
else
models_asr+=(
### Citrinet-1024 Streaming w/ CPU decoder, best latency configuration
"${riva_ngc_org}/${riva_ngc_team}/rmir_asr_citrinet_1024_${modified_lang_code}_str:${riva_ngc_model_version}"
### Citrinet-1024 Streaming w/ CPU decoder, best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_citrinet_1024_${modified_lang_code}_str_thr:${riva_ngc_model_version}"
### Citrinet-1024 Offline w/ CPU decoder,
"${riva_ngc_org}/${riva_ngc_team}/rmir_asr_citrinet_1024_${modified_lang_code}_ofl:${riva_ngc_model_version}"
)
fi
### Punctuation model
if [[ "${lang_code}" == "en-US" || "${lang_code}" == "de-DE" || "${lang_code}" == "es-US" || "${lang_code}" == "zh-CN" ]]; then
if [[ $riva_target_arch == "arm64" ]]; then
models_asr+=(
"${riva_ngc_org}/${riva_ngc_team}/models_nlp_punctuation_bert_base_${modified_lang_code}:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
)
else
models_asr+=(
"${riva_ngc_org}/${riva_ngc_team}/rmir_nlp_punctuation_bert_base_${modified_lang_code}:${riva_ngc_model_version}"
)
fi
fi
done
#Other ASR models
if [[ $riva_target_arch == "arm64" ]]; then
models_asr+=(
### Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/models_asr_conformer_en_us_str:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
### German Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/models_asr_conformer_de_de_str:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
### Spanish Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/models_asr_conformer_es_us_str:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
### Hindi Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/models_asr_conformer_hi_in_str:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
### Russian Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/models_asr_conformer_ru_ru_str:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
### Citrinet-256 Streaming w/ CPU decoder, best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/models_asr_citrinet_256_en_us_streaming:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
)
else
models_asr+=(
### Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_en_us_str:${riva_ngc_model_version}"
### Conformer acoustic model, CPU decoder, streaming best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_en_us_str_thr:${riva_ngc_model_version}"
### Conformer acoustic model, CPU decoder, offline configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_en_us_ofl:${riva_ngc_model_version}"
### German Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_de_de_str:${riva_ngc_model_version}"
### German Conformer acoustic model, CPU decoder, streaming best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_de_de_str_thr:${riva_ngc_model_version}"
### German Conformer acoustic model, CPU decoder, offline configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_de_de_ofl:${riva_ngc_model_version}"
### Spanish Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_es_us_str:${riva_ngc_model_version}"
### Spanish Conformer acoustic model, CPU decoder, streaming best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_es_us_str_thr:${riva_ngc_model_version}"
### Spanish Conformer acoustic model, CPU decoder, offline configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_es_us_ofl:${riva_ngc_model_version}"
### Hindi Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_hi_in_str:${riva_ngc_model_version}"
### Hindi Conformer acoustic model, CPU decoder, streaming best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_hi_in_str_thr:${riva_ngc_model_version}"
### Hindi Conformer acoustic model, CPU decoder, offline configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_hi_in_ofl:${riva_ngc_model_version}"
### Russian Conformer acoustic model, CPU decoder, streaming best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_ru_ru_str:${riva_ngc_model_version}"
### Russian Conformer acoustic model, CPU decoder, streaming best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_ru_ru_str_thr:${riva_ngc_model_version}"
### Russian Conformer acoustic model, CPU decoder, offline configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_conformer_ru_ru_ofl:${riva_ngc_model_version}"
### Jasper Streaming w/ CPU decoder, best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_jasper_en_us_str:${riva_ngc_model_version}"
### Jasper Streaming w/ CPU decoder, best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_jasper_en_us_str_thr:${riva_ngc_model_version}"
### Jasper Offline w/ CPU decoder
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_jasper_en_us_ofl:${riva_ngc_model_version}"
### Quarztnet Streaming w/ CPU decoder, best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_quartznet_en_us_str:${riva_ngc_model_version}"
### Quarztnet Streaming w/ CPU decoder, best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_quartznet_en_us_str_thr:${riva_ngc_model_version}"
### Quarztnet Offline w/ CPU decoder
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_quartznet_en_us_ofl:${riva_ngc_model_version}"
### Jasper Streaming w/ GPU decoder, best latency configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_jasper_en_us_str_gpu_decoder:${riva_ngc_model_version}"
### Jasper Streaming w/ GPU decoder, best throughput configuration
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_jasper_en_us_str_thr_gpu_decoder:${riva_ngc_model_version}"
### Jasper Offline w/ GPU decoder
# "${riva_ngc_org}/${riva_ngc_team}/rmir_asr_jasper_en_us_ofl_gpu_decoder:${riva_ngc_model_version}"
)
fi
########## NLP MODELS ##########
if [[ $riva_target_arch == "arm64" ]]; then
models_nlp=(
### BERT Base Intent Slot model for misty domain fine-tuned on weather, smalltalk/personality, poi/map datasets.
"${riva_ngc_org}/${riva_ngc_team}/models_nlp_intent_slot_misty_bert_base:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
### DistilBERT Intent Slot model for misty domain fine-tuned on weather, smalltalk/personality, poi/map datasets.
# "${riva_ngc_org}/${riva_ngc_team}/models_nlp_intent_slot_misty_distilbert:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
)
else
models_nlp=(
### Bert base Punctuation model
"${riva_ngc_org}/${riva_ngc_team}/rmir_nlp_punctuation_bert_base_en_us:${riva_ngc_model_version}"
### BERT base Named Entity Recognition model fine-tuned on GMB dataset with class labels LOC, PER, ORG etc.
"${riva_ngc_org}/${riva_ngc_team}/rmir_nlp_named_entity_recognition_bert_base:${riva_ngc_model_version}"
### BERT Base Intent Slot model fine-tuned on weather dataset.
"${riva_ngc_org}/${riva_ngc_team}/rmir_nlp_intent_slot_bert_base:${riva_ngc_model_version}"
### BERT Base Question Answering model fine-tuned on Squad v2.
"${riva_ngc_org}/${riva_ngc_team}/rmir_nlp_question_answering_bert_base:${riva_ngc_model_version}"
### Megatron345M Question Answering model fine-tuned on Squad v2.
# "${riva_ngc_org}/${riva_ngc_team}/rmir_nlp_question_answering_megatron:${riva_ngc_model_version}"
### Bert base Text Classification model fine-tuned on 4class (weather, meteorology, personality, nomatch) domain model.
"${riva_ngc_org}/${riva_ngc_team}/rmir_nlp_text_classification_bert_base:${riva_ngc_model_version}"
)
fi
########## TTS MODELS ##########
if [[ $riva_target_arch == "arm64" ]]; then
models_tts=(
"${riva_ngc_org}/${riva_ngc_team}/models_tts_fastpitch_hifigan_en_us_female_1:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
# "${riva_ngc_org}/${riva_ngc_team}/models_tts_fastpitch_hifigan_en_us_male_1:${riva_ngc_model_version}-${riva_target_arch}${riva_arm64_legacy_platform}"
)
else
models_tts=(
"${riva_ngc_org}/${riva_ngc_team}/rmir_tts_fastpitch_hifigan_en_us_female_1:${riva_ngc_model_version}"
# "${riva_ngc_org}/${riva_ngc_team}/rmir_tts_fastpitch_hifigan_en_us_male_1:${riva_ngc_model_version}"
)
fi
NGC_TARGET=${riva_ngc_org}
if [[ ! -z ${riva_ngc_team} ]]; then
NGC_TARGET="${NGC_TARGET}/${riva_ngc_team}"
else
team="\"\""
fi
# Specify paths to SSL Key and Certificate files to use TLS/SSL Credentials for a secured connection.
# If either are empty, an insecure connection will be used.
# Stored within container at /ssl/servert.crt and /ssl/server.key
# Optional, one can also specify a root certificate, stored within container at /ssl/root_server.crt
ssl_server_cert=""
ssl_server_key=""
ssl_root_cert=""
# define docker images required to run Riva
image_client="nvcr.io/${NGC_TARGET}/riva-speech-client:${riva_ngc_image_version}"
image_speech_api="nvcr.io/${NGC_TARGET}/riva-speech:${riva_ngc_image_version}-server"
# define docker images required to setup Riva
image_init_speech="nvcr.io/${NGC_TARGET}/riva-speech:${riva_ngc_image_version}-servicemaker"
# daemon names
riva_daemon_speech="riva-speech"
if [[ $riva_target_arch != "arm64" ]]; then
riva_daemon_client="riva-client"
fi
confirming… processing:
# bash riva_clean.sh
# bash riva_init.sh
# bash riva_start.sh
…and resulting logs:
0# docker logs riva-speech
==========================
=== Riva Speech Skills ===
==========================
NVIDIA Release 22.06 (build 40051835)
Riva Speech Server Version 2.3.0
Copyright (c) 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Various files include modifications (c) NVIDIA CORPORATION & AFFILIATES. All rights reserved.
This container image and its contents are governed by the NVIDIA Deep Learning Container License.
By pulling and using the container, you accept the terms and conditions of this license:
https://developer.nvidia.com/ngc/nvidia-deep-learning-container-license
NOTE: The SHMEM allocation limit is set to the default of 64MB. This may be
insufficient for Riva Speech Server. NVIDIA recommends the use of the following flags:
docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 ...
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:20.996861 105 onnxruntime.cc:2319] TRITONBACKEND_Initialize: onnxruntime
I0829 20:04:20.997597 105 onnxruntime.cc:2329] Triton TRITONBACKEND API version: 1.8
I0829 20:04:20.997681 105 onnxruntime.cc:2335] 'onnxruntime' TRITONBACKEND API version: 1.8
I0829 20:04:20.997749 105 onnxruntime.cc:2365] backend configuration:
{}
I0829 20:04:21.511777 105 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x10020000000' with size 268435456
I0829 20:04:21.512156 105 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 1000000000
I0829 20:04:21.517792 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline:1
I0829 20:04:21.618142 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-offline-feature-extractor-offline:1
I0829 20:04:21.655977 105 ctc-decoder-library.cc:20] TRITONBACKEND_ModelInitialize: citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0829 20:04:21.657289 111 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0829 20:04:21.657415 111 parameter_parser.cc:121] Default value will be used
W0829 20:04:21.657531 111 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0829 20:04:21.657573 111 parameter_parser.cc:121] Default value will be used
W0829 20:04:21.657608 111 parameter_parser.cc:120] Parameter max_num_slots could not be set from parameters
W0829 20:04:21.657655 111 parameter_parser.cc:121] Default value will be used
I0829 20:04:21.658139 105 backend_model.cc:255] model configuration:
{
"name": "citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline",
"platform": "",
"backend": "riva_asr_decoder",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 128,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
1025
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "END_FLAG",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_INT32",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CUSTOM_CONFIGURATION",
"data_type": "TYPE_STRING",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "FINAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_TRANSCRIPTS_SCORE",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS_STABILITY",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 128,
"preferred_batch_size": [
32,
64
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"forerunner_beam_size_token": {
"string_value": "8"
},
"forerunner_beam_threshold": {
"string_value": "10.0"
},
"decoder_num_worker_threads": {
"string_value": "-1"
},
"asr_model_delay": {
"string_value": "-1"
},
"word_insertion_score": {
"string_value": "0.2"
},
"left_padding_size": {
"string_value": "0.0"
},
"decoder_type": {
"string_value": "flashlight"
},
"forerunner_beam_size": {
"string_value": "8"
},
"max_supported_transcripts": {
"string_value": "1"
},
"chunk_size": {
"string_value": "300.0"
},
"lexicon_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline/1/lexicon.txt"
},
"smearing_mode": {
"string_value": "max"
},
"use_vad": {
"string_value": "True"
},
"lm_weight": {
"string_value": "0.2"
},
"blank_token": {
"string_value": "#"
},
"vocab_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline/1/riva_decoder_vocabulary.txt"
},
"ms_per_timestep": {
"string_value": "80"
},
"streaming": {
"string_value": "False"
},
"use_subword": {
"string_value": "True"
},
"beam_size": {
"string_value": "16"
},
"right_padding_size": {
"string_value": "0.0"
},
"beam_size_token": {
"string_value": "16"
},
"sil_token": {
"string_value": "▁"
},
"num_tokenization": {
"string_value": "1"
},
"beam_threshold": {
"string_value": "20.0"
},
"language_model_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline/1/jarvis_asr_train_datasets_noSpgi_noLS_gt_3gram.binary"
},
"tokenizer_model": {
"string_value": "/data/models/citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline/1/498056ba420d4bb3831ad557fba06032_tokenizer.model"
},
"max_execution_batch_size": {
"string_value": "1024"
},
"forerunner_use_lm": {
"string_value": "true"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0829 20:04:21.659291 105 ctc-decoder-library.cc:23] TRITONBACKEND_ModelInstanceInitialize: citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline_0 (device 0)
I0829 20:04:21.718440 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline:1
I0829 20:04:21.818839 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming:1
I0829 20:04:21.921363 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-streaming-feature-extractor-streaming:1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:22.021788 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming:1
I0829 20:04:22.122154 105 model_repository_manager.cc:994] loading: riva-punctuation-en-US:1
I0829 20:04:22.222552 105 model_repository_manager.cc:994] loading: riva-trt-citrinet-1024-en-US-asr-offline-am-offline:1
I0829 20:04:22.322946 105 model_repository_manager.cc:994] loading: riva-trt-citrinet-1024-en-US-asr-streaming-am-streaming:1
I0829 20:04:22.423352 105 model_repository_manager.cc:994] loading: riva-trt-riva-punctuation-en-US-nn-bert-base-uncased:1
I0829 20:04:22.539752 111 ctc-decoder.cc:171] Beam Decoder initialized successfully!
I0829 20:04:22.540524 105 feature-extractor.cc:407] TRITONBACKEND_ModelInitialize: citrinet-1024-en-US-asr-offline-feature-extractor-offline (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0829 20:04:22.541316 112 parameter_parser.cc:120] Parameter is_dither_seed_random could not be set from parameters
W0829 20:04:22.541436 112 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.541481 112 parameter_parser.cc:120] Parameter max_batch_size could not be set from parameters
W0829 20:04:22.541538 112 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.541584 112 parameter_parser.cc:120] Parameter max_sequence_idle_microseconds could not be set from parameters
W0829 20:04:22.541615 112 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.541668 112 parameter_parser.cc:120] Parameter preemph_coeff could not be set from parameters
W0829 20:04:22.541703 112 parameter_parser.cc:121] Default value will be used
I0829 20:04:22.547923 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline' version 1
I0829 20:04:22.691245 105 backend_model.cc:255] model configuration:
{
"name": "citrinet-1024-en-US-asr-offline-feature-extractor-offline",
"platform": "",
"backend": "riva_asr_features",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 1,
"input": [
{
"name": "AUDIO_SIGNAL",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SAMPLE_RATE",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "AUDIO_FEATURES",
"data_type": "TYPE_FP32",
"dims": [
80,
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "AUDIO_PROCESSED",
"data_type": "TYPE_FP32",
"dims": [
1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 1,
"preferred_batch_size": [
1
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "citrinet-1024-en-US-asr-offline-feature-extractor-offline_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"gain": {
"string_value": "1.0"
},
"use_utterance_norm_params": {
"string_value": "False"
},
"precalc_norm_time_steps": {
"string_value": "0"
},
"precalc_norm_params": {
"string_value": "False"
},
"dither": {
"string_value": "0.0"
},
"norm_per_feature": {
"string_value": "True"
},
"mean": {
"string_value": "-11.4412, -9.9334, -9.1292, -9.0365, -9.2804, -9.5643, -9.7342, -9.6925, -9.6333, -9.2808, -9.1887, -9.1422, -9.1397, -9.2028, -9.2749, -9.4776, -9.9185, -10.1557, -10.3800, -10.5067, -10.3190, -10.4728, -10.5529, -10.6402, -10.6440, -10.5113, -10.7395, -10.7870, -10.6074, -10.5033, -10.8278, -10.6384, -10.8481, -10.6875, -10.5454, -10.4747, -10.5165, -10.4930, -10.3413, -10.3472, -10.3735, -10.6830, -10.8813, -10.6338, -10.3856, -10.7727, -10.8957, -10.8068, -10.7373, -10.6108, -10.3405, -10.2889, -10.3922, -10.4946, -10.3367, -10.4164, -10.9949, -10.7196, -10.3971, -10.1734, -9.9257, -9.6557, -9.1761, -9.6653, -9.7876, -9.7230, -9.7792, -9.7056, -9.2702, -9.4650, -9.2755, -9.1369, -9.1174, -8.9197, -8.5394, -8.2614, -8.1353, -8.1422, -8.3430, -8.6655"
},
"stddev": {
"string_value": "2.2668, 3.1642, 3.7079, 3.7642, 3.5349, 3.5901, 3.7640, 3.8424, 4.0145, 4.1475, 4.0457, 3.9048, 3.7709, 3.6117, 3.3188, 3.1489, 3.0615, 3.0362, 2.9929, 3.0500, 3.0341, 3.0484, 3.0103, 2.9474, 2.9128, 2.8669, 2.8332, 2.9411, 3.0378, 3.0712, 3.0190, 2.9992, 3.0124, 3.0024, 3.0275, 3.0870, 3.0656, 3.0142, 3.0493, 3.1373, 3.1135, 3.0675, 2.8828, 2.7018, 2.6296, 2.8826, 2.9325, 2.9288, 2.9271, 2.9890, 3.0137, 2.9855, 3.0839, 2.9319, 2.3512, 2.3795, 2.6191, 2.7555, 2.9326, 2.9931, 3.1543, 3.0855, 2.6820, 3.0566, 3.1272, 3.1663, 3.1836, 3.0018, 2.9089, 3.1727, 3.1626, 3.1086, 2.9804, 3.1107, 3.2998, 3.3697, 3.3716, 3.2487, 3.1597, 3.1181"
},
"chunk_size": {
"string_value": "300.0"
},
"max_execution_batch_size": {
"string_value": "1"
},
"sample_rate": {
"string_value": "16000"
},
"window_stride": {
"string_value": "0.01"
},
"window_size": {
"string_value": "0.025"
},
"num_features": {
"string_value": "80"
},
"streaming": {
"string_value": "False"
},
"left_padding_size": {
"string_value": "0.0"
},
"stddev_floor": {
"string_value": "1e-05"
},
"transpose": {
"string_value": "False"
},
"right_padding_size": {
"string_value": "0.0"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0829 20:04:22.692584 105 vad_library.cc:18] TRITONBACKEND_ModelInitialize: citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0829 20:04:22.693289 113 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0829 20:04:22.693377 113 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.693456 113 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0829 20:04:22.693511 113 parameter_parser.cc:121] Default value will be used
I0829 20:04:22.693940 105 backend_model.cc:255] model configuration:
{
"name": "citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline",
"platform": "",
"backend": "riva_asr_vad",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 2048,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
1025
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"chunk_size": {
"string_value": "300.0"
},
"vad_start_th": {
"string_value": "0.2"
},
"vad_stop_th": {
"string_value": "0.98"
},
"vad_type": {
"string_value": "ctc-vad"
},
"vocab_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline/1/riva_decoder_vocabulary.txt"
},
"residue_blanks_at_start": {
"string_value": "0"
},
"ms_per_timestep": {
"string_value": "80"
},
"streaming": {
"string_value": "False"
},
"use_subword": {
"string_value": "True"
},
"residue_blanks_at_end": {
"string_value": "0"
},
"vad_stop_history": {
"string_value": "800"
},
"vad_start_history": {
"string_value": "300"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0829 20:04:22.694467 105 ctc-decoder-library.cc:20] TRITONBACKEND_ModelInitialize: citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming (version 1)
W0829 20:04:22.695189 114 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0829 20:04:22.695247 114 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.695338 114 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0829 20:04:22.695382 114 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.695416 114 parameter_parser.cc:120] Parameter max_num_slots could not be set from parameters
W0829 20:04:22.695467 114 parameter_parser.cc:121] Default value will be used
I0829 20:04:22.695963 105 backend_model.cc:255] model configuration:
{
"name": "citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming",
"platform": "",
"backend": "riva_asr_decoder",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 1024,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
1025
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "END_FLAG",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_INT32",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CUSTOM_CONFIGURATION",
"data_type": "TYPE_STRING",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "FINAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_TRANSCRIPTS_SCORE",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS_STABILITY",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 1024,
"preferred_batch_size": [
32,
64
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"forerunner_beam_size_token": {
"string_value": "8"
},
"forerunner_beam_threshold": {
"string_value": "10.0"
},
"asr_model_delay": {
"string_value": "-1"
},
"decoder_num_worker_threads": {
"string_value": "-1"
},
"word_insertion_score": {
"string_value": "0.2"
},
"left_padding_size": {
"string_value": "1.92"
},
"decoder_type": {
"string_value": "flashlight"
},
"forerunner_beam_size": {
"string_value": "8"
},
"chunk_size": {
"string_value": "0.16"
},
"max_supported_transcripts": {
"string_value": "1"
},
"lexicon_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/lexicon.txt"
},
"smearing_mode": {
"string_value": "max"
},
"use_vad": {
"string_value": "True"
},
"lm_weight": {
"string_value": "0.2"
},
"blank_token": {
"string_value": "#"
},
"vocab_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/riva_decoder_vocabulary.txt"
},
"ms_per_timestep": {
"string_value": "80"
},
"streaming": {
"string_value": "True"
},
"use_subword": {
"string_value": "True"
},
"beam_size": {
"string_value": "16"
},
"right_padding_size": {
"string_value": "1.92"
},
"beam_size_token": {
"string_value": "16"
},
"sil_token": {
"string_value": "▁"
},
"num_tokenization": {
"string_value": "1"
},
"beam_threshold": {
"string_value": "20.0"
},
"language_model_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/jarvis_asr_train_datasets_noSpgi_noLS_gt_3gram.binary"
},
"tokenizer_model": {
"string_value": "/data/models/citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/498056ba420d4bb3831ad557fba06032_tokenizer.model"
},
"max_execution_batch_size": {
"string_value": "1024"
},
"forerunner_use_lm": {
"string_value": "true"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0829 20:04:22.696992 105 vad_library.cc:18] TRITONBACKEND_ModelInitialize: citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming (version 1)
W0829 20:04:22.697497 119 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0829 20:04:22.697556 119 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.697623 119 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0829 20:04:22.697667 119 parameter_parser.cc:121] Default value will be used
I0829 20:04:22.698071 105 backend_model.cc:255] model configuration:
{
"name": "citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming",
"platform": "",
"backend": "riva_asr_vad",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 2048,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
1025
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"chunk_size": {
"string_value": "0.16"
},
"vad_start_th": {
"string_value": "0.2"
},
"vad_stop_th": {
"string_value": "0.98"
},
"vad_type": {
"string_value": "ctc-vad"
},
"vocab_file": {
"string_value": "/data/models/citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming/1/riva_decoder_vocabulary.txt"
},
"ms_per_timestep": {
"string_value": "80"
},
"residue_blanks_at_start": {
"string_value": "-2"
},
"streaming": {
"string_value": "True"
},
"use_subword": {
"string_value": "True"
},
"residue_blanks_at_end": {
"string_value": "0"
},
"vad_stop_history": {
"string_value": "800"
},
"vad_start_history": {
"string_value": "300"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0829 20:04:22.705331 105 feature-extractor.cc:407] TRITONBACKEND_ModelInitialize: citrinet-1024-en-US-asr-streaming-feature-extractor-streaming (version 1)
W0829 20:04:22.706038 118 parameter_parser.cc:120] Parameter is_dither_seed_random could not be set from parameters
W0829 20:04:22.706099 118 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.706149 118 parameter_parser.cc:120] Parameter max_batch_size could not be set from parameters
W0829 20:04:22.706193 118 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.706225 118 parameter_parser.cc:120] Parameter max_sequence_idle_microseconds could not be set from parameters
W0829 20:04:22.706276 118 parameter_parser.cc:121] Default value will be used
W0829 20:04:22.706313 118 parameter_parser.cc:120] Parameter preemph_coeff could not be set from parameters
W0829 20:04:22.706364 118 parameter_parser.cc:121] Default value will be used
I0829 20:04:22.721384 105 backend_model.cc:255] model configuration:
{
"name": "citrinet-1024-en-US-asr-streaming-feature-extractor-streaming",
"platform": "",
"backend": "riva_asr_features",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 1024,
"input": [
{
"name": "AUDIO_SIGNAL",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SAMPLE_RATE",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "AUDIO_FEATURES",
"data_type": "TYPE_FP32",
"dims": [
80,
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "AUDIO_PROCESSED",
"data_type": "TYPE_FP32",
"dims": [
1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 1024,
"preferred_batch_size": [
256,
512
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "citrinet-1024-en-US-asr-streaming-feature-extractor-streaming_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"streaming": {
"string_value": "True"
},
"stddev_floor": {
"string_value": "1e-05"
},
"transpose": {
"string_value": "False"
},
"left_padding_size": {
"string_value": "1.92"
},
"right_padding_size": {
"string_value": "1.92"
},
"gain": {
"string_value": "1.0"
},
"use_utterance_norm_params": {
"string_value": "False"
},
"precalc_norm_time_steps": {
"string_value": "0"
},
"dither": {
"string_value": "1e-05"
},
"precalc_norm_params": {
"string_value": "False"
},
"norm_per_feature": {
"string_value": "True"
},
"mean": {
"string_value": "-11.4412, -9.9334, -9.1292, -9.0365, -9.2804, -9.5643, -9.7342, -9.6925, -9.6333, -9.2808, -9.1887, -9.1422, -9.1397, -9.2028, -9.2749, -9.4776, -9.9185, -10.1557, -10.3800, -10.5067, -10.3190, -10.4728, -10.5529, -10.6402, -10.6440, -10.5113, -10.7395, -10.7870, -10.6074, -10.5033, -10.8278, -10.6384, -10.8481, -10.6875, -10.5454, -10.4747, -10.5165, -10.4930, -10.3413, -10.3472, -10.3735, -10.6830, -10.8813, -10.6338, -10.3856, -10.7727, -10.8957, -10.8068, -10.7373, -10.6108, -10.3405, -10.2889, -10.3922, -10.4946, -10.3367, -10.4164, -10.9949, -10.7196, -10.3971, -10.1734, -9.9257, -9.6557, -9.1761, -9.6653, -9.7876, -9.7230, -9.7792, -9.7056, -9.2702, -9.4650, -9.2755, -9.1369, -9.1174, -8.9197, -8.5394, -8.2614, -8.1353, -8.1422, -8.3430, -8.6655"
},
"stddev": {
"string_value": "2.2668, 3.1642, 3.7079, 3.7642, 3.5349, 3.5901, 3.7640, 3.8424, 4.0145, 4.1475, 4.0457, 3.9048, 3.7709, 3.6117, 3.3188, 3.1489, 3.0615, 3.0362, 2.9929, 3.0500, 3.0341, 3.0484, 3.0103, 2.9474, 2.9128, 2.8669, 2.8332, 2.9411, 3.0378, 3.0712, 3.0190, 2.9992, 3.0124, 3.0024, 3.0275, 3.0870, 3.0656, 3.0142, 3.0493, 3.1373, 3.1135, 3.0675, 2.8828, 2.7018, 2.6296, 2.8826, 2.9325, 2.9288, 2.9271, 2.9890, 3.0137, 2.9855, 3.0839, 2.9319, 2.3512, 2.3795, 2.6191, 2.7555, 2.9326, 2.9931, 3.1543, 3.0855, 2.6820, 3.0566, 3.1272, 3.1663, 3.1836, 3.0018, 2.9089, 3.1727, 3.1626, 3.1086, 2.9804, 3.1107, 3.2998, 3.3697, 3.3716, 3.2487, 3.1597, 3.1181"
},
"chunk_size": {
"string_value": "0.16"
},
"max_execution_batch_size": {
"string_value": "1024"
},
"sample_rate": {
"string_value": "16000"
},
"window_stride": {
"string_value": "0.01"
},
"window_size": {
"string_value": "0.025"
},
"num_features": {
"string_value": "80"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0829 20:04:22.722161 105 vad_library.cc:21] TRITONBACKEND_ModelInstanceInitialize: citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming_0 (device 0)
I0829 20:04:22.836742 105 feature-extractor.cc:409] TRITONBACKEND_ModelInstanceInitialize: citrinet-1024-en-US-asr-offline-feature-extractor-offline_0 (device 0)
I0829 20:04:22.844713 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:26.193126 105 ctc-decoder-library.cc:23] TRITONBACKEND_ModelInstanceInitialize: citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming_0 (device 0)
I0829 20:04:26.199879 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-offline-feature-extractor-offline' version 1
I0829 20:04:27.056668 114 ctc-decoder.cc:171] Beam Decoder initialized successfully!
I0829 20:04:27.056878 105 vad_library.cc:21] TRITONBACKEND_ModelInstanceInitialize: citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline_0 (device 0)
I0829 20:04:27.064029 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:27.188417 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline' version 1
I0829 20:04:27.212304 105 pipeline_library.cc:19] TRITONBACKEND_ModelInitialize: riva-punctuation-en-US (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0829 20:04:27.212878 120 parameter_parser.cc:120] Parameter bos could not be set from parameters
W0829 20:04:27.212966 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213021 120 parameter_parser.cc:120] Parameter dropout_prob could not be set from parameters
W0829 20:04:27.213052 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213101 120 parameter_parser.cc:120] Parameter eos could not be set from parameters
W0829 20:04:27.213138 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213184 120 parameter_parser.cc:120] Parameter reverse could not be set from parameters
W0829 20:04:27.213222 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213304 120 parameter_parser.cc:120] Parameter bos could not be set from parameters
W0829 20:04:27.213346 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213378 120 parameter_parser.cc:120] Parameter doc_stride could not be set from parameters
W0829 20:04:27.213426 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213460 120 parameter_parser.cc:120] Parameter dropout_prob could not be set from parameters
W0829 20:04:27.213513 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213562 120 parameter_parser.cc:120] Parameter eos could not be set from parameters
W0829 20:04:27.213593 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213649 120 parameter_parser.cc:120] Parameter margin could not be set from parameters
W0829 20:04:27.213680 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213728 120 parameter_parser.cc:120] Parameter max_batch_size could not be set from parameters
W0829 20:04:27.213762 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213804 120 parameter_parser.cc:120] Parameter max_query_length could not be set from parameters
W0829 20:04:27.213842 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213873 120 parameter_parser.cc:120] Parameter max_seq_length could not be set from parameters
W0829 20:04:27.213920 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.213956 120 parameter_parser.cc:120] Parameter reverse could not be set from parameters
W0829 20:04:27.213999 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.214037 120 parameter_parser.cc:120] Parameter step could not be set from parameters
W0829 20:04:27.214068 120 parameter_parser.cc:121] Default value will be used
W0829 20:04:27.214116 120 parameter_parser.cc:120] Parameter task could not be set from parameters
W0829 20:04:27.214149 120 parameter_parser.cc:121] Default value will be used
I0829 20:04:27.214242 105 backend_model.cc:255] model configuration:
{
"name": "riva-punctuation-en-US",
"platform": "",
"backend": "riva_nlp_pipeline",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 8,
"input": [
{
"name": "PIPELINE_INPUT",
"data_type": "TYPE_STRING",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "PIPELINE_OUTPUT",
"data_type": "TYPE_STRING",
"dims": [
1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"instance_group": [
{
"name": "riva-punctuation-en-US_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"punct_logits_tensor_name": {
"string_value": "punct_token_logits"
},
"language_code": {
"string_value": "en-US"
},
"tokenizer": {
"string_value": "wordpiece"
},
"delimiter": {
"string_value": " "
},
"input_ids_tensor_name": {
"string_value": "input_ids"
},
"model_name": {
"string_value": "riva-trt-riva-punctuation-en-US-nn-bert-base-uncased"
},
"pad_chars_with_spaces": {
"string_value": "False"
},
"remove_spaces": {
"string_value": "False"
},
"tokenizer_to_lower": {
"string_value": "true"
},
"model_family": {
"string_value": "riva"
},
"unk_token": {
"string_value": "[UNK]"
},
"vocab": {
"string_value": "/data/models/riva-punctuation-en-US/1/tokenizer.vocab_file"
},
"bos_token": {
"string_value": "[CLS]"
},
"capit_logits_tensor_name": {
"string_value": "capit_token_logits"
},
"punctuation_mapping_path": {
"string_value": "/data/models/riva-punctuation-en-US/1/punct_label_ids.csv"
},
"model_api": {
"string_value": "/nvidia.riva.nlp.RivaLanguageUnderstanding/PunctuateText"
},
"pipeline_type": {
"string_value": "punctuation"
},
"to_lower": {
"string_value": "true"
},
"eos_token": {
"string_value": "[SEP]"
},
"capitalization_mapping_path": {
"string_value": "/data/models/riva-punctuation-en-US/1/capit_label_ids.csv"
},
"load_model": {
"string_value": "false"
},
"attn_mask_tensor_name": {
"string_value": "input_mask"
},
"token_type_tensor_name": {
"string_value": "segment_ids"
}
},
"model_warmup": []
}
I0829 20:04:27.214900 105 pipeline_library.cc:22] TRITONBACKEND_ModelInstanceInitialize: riva-punctuation-en-US_0 (device 0)
I0829 20:04:27.225177 105 feature-extractor.cc:409] TRITONBACKEND_ModelInstanceInitialize: citrinet-1024-en-US-asr-streaming-feature-extractor-streaming_0 (device 0)
I0829 20:04:27.231758 105 model_repository_manager.cc:1149] successfully loaded 'riva-punctuation-en-US' version 1
I0829 20:04:27.509611 105 tensorrt.cc:5145] TRITONBACKEND_Initialize: tensorrt
I0829 20:04:27.509821 105 tensorrt.cc:5155] Triton TRITONBACKEND API version: 1.8
I0829 20:04:27.509900 105 tensorrt.cc:5161] 'tensorrt' TRITONBACKEND API version: 1.8
I0829 20:04:27.510119 105 tensorrt.cc:5204] backend configuration:
{}
I0829 20:04:27.510242 105 tensorrt.cc:5256] TRITONBACKEND_ModelInitialize: riva-trt-citrinet-1024-en-US-asr-streaming-am-streaming (version 1)
I0829 20:04:27.510878 105 tensorrt.cc:5256] TRITONBACKEND_ModelInitialize: riva-trt-riva-punctuation-en-US-nn-bert-base-uncased (version 1)
I0829 20:04:27.511426 105 tensorrt.cc:5305] TRITONBACKEND_ModelInstanceInitialize: riva-trt-riva-punctuation-en-US-nn-bert-base-uncased_0 (GPU device 0)
I0829 20:04:27.520126 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-streaming-feature-extractor-streaming' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:29.346582 105 logging.cc:49] [MemUsageChange] Init CUDA: CPU +417, GPU +0, now: CPU 2067, GPU 3606 (MiB)
I0829 20:04:29.597220 105 logging.cc:49] Loaded engine size: 208 MiB
I0829 20:04:29.764864 105 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 2598, GPU 3948 (MiB)
I0829 20:04:30.001373 105 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +126, GPU +58, now: CPU 2724, GPU 4006 (MiB)
I0829 20:04:30.005638 105 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +94, now: CPU 0, GPU 94 (MiB)
I0829 20:04:30.028823 105 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 2308, GPU 3998 (MiB)
I0829 20:04:30.029769 105 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 2308, GPU 4006 (MiB)
I0829 20:04:30.119911 105 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +108, now: CPU 0, GPU 202 (MiB)
I0829 20:04:30.120289 105 tensorrt.cc:1409] Created instance riva-trt-riva-punctuation-en-US-nn-bert-base-uncased_0 on GPU 0 with stream priority 0 and optimization profile default[0];
I0829 20:04:30.120401 105 tensorrt.cc:5256] TRITONBACKEND_ModelInitialize: riva-trt-citrinet-1024-en-US-asr-offline-am-offline (version 1)
I0829 20:04:30.121026 105 tensorrt.cc:5305] TRITONBACKEND_ModelInstanceInitialize: riva-trt-citrinet-1024-en-US-asr-offline-am-offline_0 (GPU device 0)
I0829 20:04:30.121565 105 logging.cc:49] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 2421, GPU 4258 (MiB)
I0829 20:04:30.127830 105 model_repository_manager.cc:1149] successfully loaded 'riva-trt-riva-punctuation-en-US-nn-bert-base-uncased' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:30.461346 105 logging.cc:49] Loaded engine size: 283 MiB
I0829 20:04:30.744502 105 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 2997, GPU 4550 (MiB)
I0829 20:04:30.745890 105 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +0, GPU +10, now: CPU 2997, GPU 4560 (MiB)
I0829 20:04:30.748526 105 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +281, now: CPU 0, GPU 483 (MiB)
I0829 20:04:30.779644 105 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 2430, GPU 4552 (MiB)
I0829 20:04:30.780572 105 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 2430, GPU 4560 (MiB)
I0829 20:04:30.816233 105 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +565, now: CPU 0, GPU 1048 (MiB)
I0829 20:04:30.817876 105 tensorrt.cc:1409] Created instance riva-trt-citrinet-1024-en-US-asr-offline-am-offline_0 on GPU 0 with stream priority 0 and optimization profile default[0];
I0829 20:04:30.818022 105 tensorrt.cc:5305] TRITONBACKEND_ModelInstanceInitialize: riva-trt-citrinet-1024-en-US-asr-streaming-am-streaming_0 (GPU device 0)
I0829 20:04:30.818513 105 logging.cc:49] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 2439, GPU 5178 (MiB)
I0829 20:04:30.823808 105 model_repository_manager.cc:1149] successfully loaded 'riva-trt-citrinet-1024-en-US-asr-offline-am-offline' version 1
I0829 20:04:31.156220 105 logging.cc:49] Loaded engine size: 277 MiB
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:31.447480 105 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 3003, GPU 5464 (MiB)
I0829 20:04:31.448846 105 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +1, GPU +10, now: CPU 3004, GPU 5474 (MiB)
I0829 20:04:31.451456 105 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +1, GPU +275, now: CPU 1, GPU 1323 (MiB)
I0829 20:04:31.482080 105 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 2448, GPU 5466 (MiB)
I0829 20:04:31.483059 105 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 2448, GPU 5474 (MiB)
I0829 20:04:31.490937 105 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +532, now: CPU 1, GPU 1855 (MiB)
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:34.159140 105 tensorrt.cc:1409] Created instance riva-trt-citrinet-1024-en-US-asr-streaming-am-streaming_0 on GPU 0 with stream priority 0 and optimization profile default[0];
I0829 20:04:34.159613 105 model_repository_manager.cc:1149] successfully loaded 'riva-trt-citrinet-1024-en-US-asr-streaming-am-streaming' version 1
I0829 20:04:34.160377 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-offline:1
I0829 20:04:34.260745 105 model_repository_manager.cc:994] loading: citrinet-1024-en-US-asr-streaming:1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0829 20:04:34.360959 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-offline' version 1
I0829 20:04:34.361246 105 model_repository_manager.cc:1149] successfully loaded 'citrinet-1024-en-US-asr-streaming' version 1
I0829 20:04:34.361409 105 server.cc:522]
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+
I0829 20:04:34.361562 105 server.cc:549]
+-------------------+-----------------------------------------------------------------------------+--------+
| Backend | Path | Config |
+-------------------+-----------------------------------------------------------------------------+--------+
| onnxruntime | /opt/tritonserver/backends/onnxruntime/libtriton_onnxruntime.so | {} |
| riva_asr_decoder | /opt/tritonserver/backends/riva_asr_decoder/libtriton_riva_asr_decoder.so | {} |
| tensorrt | /opt/tritonserver/backends/tensorrt/libtriton_tensorrt.so | {} |
| riva_asr_vad | /opt/tritonserver/backends/riva_asr_vad/libtriton_riva_asr_vad.so | {} |
| riva_asr_features | /opt/tritonserver/backends/riva_asr_features/libtriton_riva_asr_features.so | {} |
| riva_nlp_pipeline | /opt/tritonserver/backends/riva_nlp_pipeline/libtriton_riva_nlp_pipeline.so | {} |
+-------------------+-----------------------------------------------------------------------------+--------+
I0829 20:04:34.361768 105 server.cc:592]
+-------------------------------------------------------------------------+---------+--------+
| Model | Version | Status |
+-------------------------------------------------------------------------+---------+--------+
| citrinet-1024-en-US-asr-offline | 1 | READY |
| citrinet-1024-en-US-asr-offline-ctc-decoder-cpu-offline | 1 | READY |
| citrinet-1024-en-US-asr-offline-feature-extractor-offline | 1 | READY |
| citrinet-1024-en-US-asr-offline-voice-activity-detector-ctc-offline | 1 | READY |
| citrinet-1024-en-US-asr-streaming | 1 | READY |
| citrinet-1024-en-US-asr-streaming-ctc-decoder-cpu-streaming | 1 | READY |
| citrinet-1024-en-US-asr-streaming-feature-extractor-streaming | 1 | READY |
| citrinet-1024-en-US-asr-streaming-voice-activity-detector-ctc-streaming | 1 | READY |
| riva-punctuation-en-US | 1 | READY |
| riva-trt-citrinet-1024-en-US-asr-offline-am-offline | 1 | READY |
| riva-trt-citrinet-1024-en-US-asr-streaming-am-streaming | 1 | READY |
| riva-trt-riva-punctuation-en-US-nn-bert-base-uncased | 1 | READY |
+-------------------------------------------------------------------------+---------+--------+
I0829 20:04:34.375394 105 metrics.cc:623] Collecting metrics for GPU 0: GRID A100D-8C
I0829 20:04:34.375790 105 tritonserver.cc:1932]
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option | Value |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id | triton |
| server_version | 2.19.0 |
| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |
| model_repository_path[0] | /data/models |
| model_control_mode | MODE_NONE |
| strict_model_config | 1 |
| rate_limit | OFF |
| pinned_memory_pool_byte_size | 268435456 |
| cuda_memory_pool_byte_size{0} | 1000000000 |
| response_cache_byte_size | 0 |
| min_supported_compute_capability | 6.0 |
| strict_readiness | 1 |
| exit_timeout | 30 |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0829 20:04:34.376740 105 grpc_server.cc:4375] Started GRPCInferenceService at 0.0.0.0:8001
I0829 20:04:34.377056 105 http_server.cc:3075] Started HTTPService at 0.0.0.0:8000
I0829 20:04:34.418180 105 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002
> Triton server is ready...
I0829 20:04:35.298982 267 riva_server.cc:118] Using Insecure Server Credentials
I0829 20:04:35.302084 267 model_registry.cc:112] Successfully registered: citrinet-1024-en-US-asr-offline for ASR
I0829 20:04:35.305709 267 model_registry.cc:112] Successfully registered: citrinet-1024-en-US-asr-streaming for ASR
I0829 20:04:35.327199 267 model_registry.cc:112] Successfully registered: riva-punctuation-en-US for NLP
W0829 20:04:35.377905 105 metrics.cc:401] Unable to get power limit for GPU 0. Status:Success, value:0.000000
W0829 20:04:35.378040 105 metrics.cc:419] Unable to get power usage for GPU 0. Status:Success, value:0.000000
W0829 20:04:35.378113 105 metrics.cc:443] Unable to get energy consumption for GPU 0. Status:Success, value:0
I0829 20:04:35.397414 267 riva_server.cc:158] Riva Conversational AI Server listening on 0.0.0.0:50051
W0829 20:04:35.397500 267 stats_reporter.cc:41] No API key provided. Stats reporting disabled.
W0829 20:04:36.378317 105 metrics.cc:401] Unable to get power limit for GPU 0. Status:Success, value:0.000000
W0829 20:04:36.378600 105 metrics.cc:419] Unable to get power usage for GPU 0. Status:Success, value:0.000000
W0829 20:04:36.378641 105 metrics.cc:443] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0829 20:04:37.380072 105 metrics.cc:401] Unable to get power limit for GPU 0. Status:Success, value:0.000000
W0829 20:04:37.380270 105 metrics.cc:419] Unable to get power usage for GPU 0. Status:Success, value:0.000000
W0829 20:04:37.380347 105 metrics.cc:443] Unable to get energy consumption for GPU 0. Status:Success, value:0