Per your request:
docker volume inspect riva-model-repo:
root@tengine:~/riva_quickstart_v2.4.0# docker volume inspect riva-model-repo
[
{
"CreatedAt": "2022-08-23T17:44:14Z",
"Driver": "local",
"Labels": null,
"Mountpoint": "/var/lib/docker/volumes/riva-model-repo/_data",
"Name": "riva-model-repo",
"Options": null,
"Scope": "local"
}
]
ls /models:
riva_quickstart_v2.4.0# ls /var/lib/docker/volumes/riva-model-repo/_data/models/
conformer-en-US-asr-offline conformer-en-US-asr-streaming-feature-extractor-streaming
conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming
conformer-en-US-asr-offline-feature-extractor-streaming-offline riva-punctuation-en-US
conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline riva-trt-conformer-en-US-asr-offline-am-streaming-offline
conformer-en-US-asr-streaming riva-trt-conformer-en-US-asr-streaming-am-streaming
conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming riva-trt-riva-punctuation-en-US-nn-bert-base-uncased
docker logs riva-speech
root@tengine:~/riva_quickstart_v2.4.0# docker logs riva-speech
Waiting for Riva server to load all models...retrying in 10 seconds
Waiting for Riva server to load all models...retrying in 10 seconds
Health ready check failed.
Check Riva logs with: docker logs riva-speech
root@tengine:~/riva_quickstart_v2.4.0# docker logs riva-speech
==========================
=== Riva Speech Skills ===
==========================
NVIDIA Release 22.07 (build 42007163)
Riva Speech Server Version 2.4.0
Copyright (c) 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Various files include modifications (c) NVIDIA CORPORATION & AFFILIATES. All rights reserved.
This container image and its contents are governed by the NVIDIA Deep Learning Container License.
By pulling and using the container, you accept the terms and conditions of this license:
https://developer.nvidia.com/ngc/nvidia-deep-learning-container-license
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:02.207992 101 onnxruntime.cc:2400] TRITONBACKEND_Initialize: onnxruntime
I0824 23:02:02.208177 101 onnxruntime.cc:2410] Triton TRITONBACKEND API version: 1.9
I0824 23:02:02.208487 101 onnxruntime.cc:2416] 'onnxruntime' TRITONBACKEND API version: 1.9
I0824 23:02:02.208554 101 onnxruntime.cc:2446] backend configuration:
{}
I0824 23:02:02.712583 101 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x10020000000' with size 268435456
I0824 23:02:02.712969 101 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 1000000000
E0824 23:02:02.719851 101 model_repository_manager.cc:2064] Poll failed for model directory 'riva-trt-conformer-en-US-asr-streaming-am-streaming': failed to open text file for read /data/models/riva-trt-conformer-en-US-asr-streaming-am-streaming/config.pbtxt: No such file or directory
E0824 23:02:02.720555 101 model_repository_manager.cc:1420] Invalid argument: ensemble conformer-en-US-asr-streaming contains models that are not available: riva-trt-conformer-en-US-asr-streaming-am-streaming
I0824 23:02:02.720745 101 model_repository_manager.cc:1077] loading: conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline:1
I0824 23:02:02.821009 101 model_repository_manager.cc:1077] loading: conformer-en-US-asr-offline-feature-extractor-streaming-offline:1
I0824 23:02:02.864406 101 ctc-decoder-library.cc:20] TRITONBACKEND_ModelInitialize: conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0824 23:02:02.865731 107 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0824 23:02:02.865850 107 parameter_parser.cc:121] Default value will be used
W0824 23:02:02.865895 107 parameter_parser.cc:120] Parameter use_vad could not be set from parameters
W0824 23:02:02.865929 107 parameter_parser.cc:121] Default value will be used
W0824 23:02:02.866024 107 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0824 23:02:02.866072 107 parameter_parser.cc:121] Default value will be used
W0824 23:02:02.866107 107 parameter_parser.cc:120] Parameter max_num_slots could not be set from parameters
W0824 23:02:02.866155 107 parameter_parser.cc:121] Default value will be used
W0824 23:02:02.866194 107 parameter_parser.cc:120] Parameter use_vad could not be set from parameters
W0824 23:02:02.866237 107 parameter_parser.cc:121] Default value will be used
I0824 23:02:02.866689 101 backend_model.cc:255] model configuration:
{
"name": "conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline",
"platform": "",
"backend": "riva_asr_decoder",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 1024,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
257
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "END_FLAG",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CUSTOM_CONFIGURATION",
"data_type": "TYPE_STRING",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "FINAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_TRANSCRIPTS_SCORE",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS_STABILITY",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"graph": {
"level": 0
},
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 1024,
"preferred_batch_size": [
32,
64
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"ms_per_timestep": {
"string_value": "40"
},
"streaming": {
"string_value": "True"
},
"use_subword": {
"string_value": "True"
},
"beam_size": {
"string_value": "32"
},
"right_padding_size": {
"string_value": "1.6"
},
"beam_size_token": {
"string_value": "16"
},
"sil_token": {
"string_value": "▁"
},
"num_tokenization": {
"string_value": "1"
},
"beam_threshold": {
"string_value": "20.0"
},
"language_model_file": {
"string_value": "/data/models/conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline/1/en_4.0.bin"
},
"tokenizer_model": {
"string_value": "/data/models/conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline/1/8b8f095152034e98b24ab33726708bd0_tokenizer.model"
},
"max_execution_batch_size": {
"string_value": "1024"
},
"forerunner_use_lm": {
"string_value": "true"
},
"forerunner_beam_size_token": {
"string_value": "8"
},
"forerunner_beam_threshold": {
"string_value": "10.0"
},
"asr_model_delay": {
"string_value": "-1"
},
"decoder_num_worker_threads": {
"string_value": "-1"
},
"word_insertion_score": {
"string_value": "1.0"
},
"unk_token": {
"string_value": "<unk>"
},
"left_padding_size": {
"string_value": "1.6"
},
"set_default_index_to_unk_token": {
"string_value": "False"
},
"decoder_type": {
"string_value": "flashlight"
},
"forerunner_beam_size": {
"string_value": "8"
},
"unk_score": {
"string_value": "-inf"
},
"max_supported_transcripts": {
"string_value": "1"
},
"chunk_size": {
"string_value": "4.8"
},
"lexicon_file": {
"string_value": "/data/models/conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline/1/lexicon.txt"
},
"smearing_mode": {
"string_value": "max"
},
"log_add": {
"string_value": "True"
},
"blank_token": {
"string_value": "#"
},
"lm_weight": {
"string_value": "0.8"
},
"vocab_file": {
"string_value": "/data/models/conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline/1/riva_decoder_vocabulary.txt"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0824 23:02:02.867876 101 ctc-decoder-library.cc:23] TRITONBACKEND_ModelInstanceInitialize: conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline_0 (device 0)
I0824 23:02:02.921271 101 model_repository_manager.cc:1077] loading: conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline:1
I0824 23:02:03.021628 101 model_repository_manager.cc:1077] loading: conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming:1
I0824 23:02:03.121994 101 model_repository_manager.cc:1077] loading: conformer-en-US-asr-streaming-feature-extractor-streaming:1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:03.222297 101 model_repository_manager.cc:1077] loading: conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming:1
I0824 23:02:03.322647 101 model_repository_manager.cc:1077] loading: riva-punctuation-en-US:1
I0824 23:02:03.501586 101 model_repository_manager.cc:1077] loading: riva-trt-conformer-en-US-asr-offline-am-streaming-offline:1
I0824 23:02:03.602010 101 model_repository_manager.cc:1077] loading: riva-trt-riva-punctuation-en-US-nn-bert-base-uncased:1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:04.540582 107 ctc-decoder.cc:171] Beam Decoder initialized successfully!
I0824 23:02:04.541307 101 model_repository_manager.cc:1231] successfully loaded 'conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline' version 1
I0824 23:02:04.541934 101 feature-extractor.cc:399] TRITONBACKEND_ModelInitialize: conformer-en-US-asr-offline-feature-extractor-streaming-offline (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0824 23:02:04.542941 108 parameter_parser.cc:120] Parameter is_dither_seed_random could not be set from parameters
W0824 23:02:04.543058 108 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.543107 108 parameter_parser.cc:120] Parameter max_batch_size could not be set from parameters
W0824 23:02:04.543138 108 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.543191 108 parameter_parser.cc:120] Parameter max_sequence_idle_microseconds could not be set from parameters
W0824 23:02:04.543226 108 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.543282 108 parameter_parser.cc:120] Parameter preemph_coeff could not be set from parameters
W0824 23:02:04.543315 108 parameter_parser.cc:121] Default value will be used
I0824 23:02:04.609855 101 backend_model.cc:255] model configuration:
{
"name": "conformer-en-US-asr-offline-feature-extractor-streaming-offline",
"platform": "",
"backend": "riva_asr_features",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 512,
"input": [
{
"name": "AUDIO_SIGNAL",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SAMPLE_RATE",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "AUDIO_FEATURES",
"data_type": "TYPE_FP32",
"dims": [
80,
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "AUDIO_PROCESSED",
"data_type": "TYPE_FP32",
"dims": [
1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "AUDIO_FEATURES_LENGTH",
"data_type": "TYPE_INT32",
"dims": [
1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"graph": {
"level": 0
},
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 512,
"preferred_batch_size": [
256,
512
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "conformer-en-US-asr-offline-feature-extractor-streaming-offline_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"gain": {
"string_value": "1.0"
},
"precalc_norm_time_steps": {
"string_value": "0"
},
"use_utterance_norm_params": {
"string_value": "False"
},
"dither": {
"string_value": "1e-05"
},
"precalc_norm_params": {
"string_value": "False"
},
"norm_per_feature": {
"string_value": "True"
},
"mean": {
"string_value": "-11.4412, -9.9334, -9.1292, -9.0365, -9.2804, -9.5643, -9.7342, -9.6925, -9.6333, -9.2808, -9.1887, -9.1422, -9.1397, -9.2028, -9.2749, -9.4776, -9.9185, -10.1557, -10.3800, -10.5067, -10.3190, -10.4728, -10.5529, -10.6402, -10.6440, -10.5113, -10.7395, -10.7870, -10.6074, -10.5033, -10.8278, -10.6384, -10.8481, -10.6875, -10.5454, -10.4747, -10.5165, -10.4930, -10.3413, -10.3472, -10.3735, -10.6830, -10.8813, -10.6338, -10.3856, -10.7727, -10.8957, -10.8068, -10.7373, -10.6108, -10.3405, -10.2889, -10.3922, -10.4946, -10.3367, -10.4164, -10.9949, -10.7196, -10.3971, -10.1734, -9.9257, -9.6557, -9.1761, -9.6653, -9.7876, -9.7230, -9.7792, -9.7056, -9.2702, -9.4650, -9.2755, -9.1369, -9.1174, -8.9197, -8.5394, -8.2614, -8.1353, -8.1422, -8.3430, -8.6655"
},
"stddev": {
"string_value": "2.2668, 3.1642, 3.7079, 3.7642, 3.5349, 3.5901, 3.7640, 3.8424, 4.0145, 4.1475, 4.0457, 3.9048, 3.7709, 3.6117, 3.3188, 3.1489, 3.0615, 3.0362, 2.9929, 3.0500, 3.0341, 3.0484, 3.0103, 2.9474, 2.9128, 2.8669, 2.8332, 2.9411, 3.0378, 3.0712, 3.0190, 2.9992, 3.0124, 3.0024, 3.0275, 3.0870, 3.0656, 3.0142, 3.0493, 3.1373, 3.1135, 3.0675, 2.8828, 2.7018, 2.6296, 2.8826, 2.9325, 2.9288, 2.9271, 2.9890, 3.0137, 2.9855, 3.0839, 2.9319, 2.3512, 2.3795, 2.6191, 2.7555, 2.9326, 2.9931, 3.1543, 3.0855, 2.6820, 3.0566, 3.1272, 3.1663, 3.1836, 3.0018, 2.9089, 3.1727, 3.1626, 3.1086, 2.9804, 3.1107, 3.2998, 3.3697, 3.3716, 3.2487, 3.1597, 3.1181"
},
"chunk_size": {
"string_value": "4.8"
},
"max_execution_batch_size": {
"string_value": "512"
},
"sample_rate": {
"string_value": "16000"
},
"window_size": {
"string_value": "0.025"
},
"num_features": {
"string_value": "80"
},
"window_stride": {
"string_value": "0.01"
},
"streaming": {
"string_value": "True"
},
"left_padding_size": {
"string_value": "1.6"
},
"stddev_floor": {
"string_value": "1e-05"
},
"transpose": {
"string_value": "False"
},
"right_padding_size": {
"string_value": "1.6"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0824 23:02:04.611508 101 vad_library.cc:18] TRITONBACKEND_ModelInitialize: conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0824 23:02:04.612211 109 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0824 23:02:04.612298 109 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.612373 109 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0824 23:02:04.612419 109 parameter_parser.cc:121] Default value will be used
I0824 23:02:04.612833 101 backend_model.cc:255] model configuration:
{
"name": "conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline",
"platform": "",
"backend": "riva_asr_vad",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 2048,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
257
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_FP32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"graph": {
"level": 0
},
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"vocab_file": {
"string_value": "/data/models/conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline/1/riva_decoder_vocabulary.txt"
},
"residue_blanks_at_start": {
"string_value": "0"
},
"ms_per_timestep": {
"string_value": "40"
},
"use_subword": {
"string_value": "True"
},
"streaming": {
"string_value": "True"
},
"residue_blanks_at_end": {
"string_value": "0"
},
"vad_stop_history": {
"string_value": "800"
},
"vad_start_history": {
"string_value": "200"
},
"chunk_size": {
"string_value": "4.8"
},
"vad_stop_th": {
"string_value": "0.98"
},
"vad_start_th": {
"string_value": "0.2"
},
"vad_type": {
"string_value": "ctc-vad"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0824 23:02:04.613409 101 ctc-decoder-library.cc:20] TRITONBACKEND_ModelInitialize: conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming (version 1)
W0824 23:02:04.614167 110 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0824 23:02:04.614225 110 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.614292 110 parameter_parser.cc:120] Parameter use_vad could not be set from parameters
W0824 23:02:04.614327 110 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.614471 110 parameter_parser.cc:120] Parameter forerunner_start_offset_ms could not be set from parameters
W0824 23:02:04.614517 110 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.614552 110 parameter_parser.cc:120] Parameter max_num_slots could not be set from parameters
W0824 23:02:04.614603 110 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.614641 110 parameter_parser.cc:120] Parameter use_vad could not be set from parameters
W0824 23:02:04.614693 110 parameter_parser.cc:121] Default value will be used
I0824 23:02:04.615106 101 backend_model.cc:255] model configuration:
{
"name": "conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming",
"platform": "",
"backend": "riva_asr_decoder",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 1024,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
257
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "END_FLAG",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CUSTOM_CONFIGURATION",
"data_type": "TYPE_STRING",
"format": "FORMAT_NONE",
"dims": [
-1,
2
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "FINAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_TRANSCRIPTS_SCORE",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "FINAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS",
"data_type": "TYPE_STRING",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_TRANSCRIPTS_STABILITY",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "PARTIAL_WORDS_START_END",
"data_type": "TYPE_INT32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"graph": {
"level": 0
},
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 1024,
"preferred_batch_size": [
32,
64
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"right_padding_size": {
"string_value": "1.92"
},
"beam_size_token": {
"string_value": "16"
},
"sil_token": {
"string_value": "▁"
},
"num_tokenization": {
"string_value": "1"
},
"beam_threshold": {
"string_value": "20.0"
},
"tokenizer_model": {
"string_value": "/data/models/conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/8b8f095152034e98b24ab33726708bd0_tokenizer.model"
},
"language_model_file": {
"string_value": "/data/models/conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/en_4.0.bin"
},
"max_execution_batch_size": {
"string_value": "1024"
},
"forerunner_use_lm": {
"string_value": "true"
},
"forerunner_beam_size_token": {
"string_value": "8"
},
"forerunner_beam_threshold": {
"string_value": "10.0"
},
"asr_model_delay": {
"string_value": "-1"
},
"decoder_num_worker_threads": {
"string_value": "-1"
},
"word_insertion_score": {
"string_value": "1.0"
},
"unk_token": {
"string_value": "<unk>"
},
"left_padding_size": {
"string_value": "1.92"
},
"set_default_index_to_unk_token": {
"string_value": "False"
},
"decoder_type": {
"string_value": "flashlight"
},
"forerunner_beam_size": {
"string_value": "8"
},
"unk_score": {
"string_value": "-inf"
},
"max_supported_transcripts": {
"string_value": "1"
},
"chunk_size": {
"string_value": "0.16"
},
"lexicon_file": {
"string_value": "/data/models/conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/lexicon.txt"
},
"smearing_mode": {
"string_value": "max"
},
"log_add": {
"string_value": "True"
},
"blank_token": {
"string_value": "#"
},
"lm_weight": {
"string_value": "0.8"
},
"vocab_file": {
"string_value": "/data/models/conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming/1/riva_decoder_vocabulary.txt"
},
"ms_per_timestep": {
"string_value": "40"
},
"streaming": {
"string_value": "True"
},
"use_subword": {
"string_value": "True"
},
"beam_size": {
"string_value": "32"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0824 23:02:04.616184 101 vad_library.cc:18] TRITONBACKEND_ModelInitialize: conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming (version 1)
W0824 23:02:04.616688 115 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0824 23:02:04.616758 115 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.616823 115 parameter_parser.cc:120] Parameter max_execution_batch_size could not be set from parameters
W0824 23:02:04.616858 115 parameter_parser.cc:121] Default value will be used
I0824 23:02:04.617214 101 backend_model.cc:255] model configuration:
{
"name": "conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming",
"platform": "",
"backend": "riva_asr_vad",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 2048,
"input": [
{
"name": "CLASS_LOGITS",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1,
257
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "SEGMENTS_START_END",
"data_type": "TYPE_FP32",
"dims": [
-1,
2
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"graph": {
"level": 0
},
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"vad_stop_th": {
"string_value": "0.98"
},
"vad_start_th": {
"string_value": "0.2"
},
"vad_type": {
"string_value": "ctc-vad"
},
"vocab_file": {
"string_value": "/data/models/conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming/1/riva_decoder_vocabulary.txt"
},
"ms_per_timestep": {
"string_value": "40"
},
"residue_blanks_at_start": {
"string_value": "-2"
},
"use_subword": {
"string_value": "True"
},
"streaming": {
"string_value": "True"
},
"residue_blanks_at_end": {
"string_value": "0"
},
"vad_stop_history": {
"string_value": "800"
},
"vad_start_history": {
"string_value": "200"
},
"chunk_size": {
"string_value": "0.16"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0824 23:02:04.625366 101 feature-extractor.cc:399] TRITONBACKEND_ModelInitialize: conformer-en-US-asr-streaming-feature-extractor-streaming (version 1)
W0824 23:02:04.626096 114 parameter_parser.cc:120] Parameter is_dither_seed_random could not be set from parameters
W0824 23:02:04.626158 114 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.626210 114 parameter_parser.cc:120] Parameter max_batch_size could not be set from parameters
W0824 23:02:04.626268 114 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.626320 114 parameter_parser.cc:120] Parameter max_sequence_idle_microseconds could not be set from parameters
W0824 23:02:04.626374 114 parameter_parser.cc:121] Default value will be used
W0824 23:02:04.626410 114 parameter_parser.cc:120] Parameter preemph_coeff could not be set from parameters
W0824 23:02:04.626468 114 parameter_parser.cc:121] Default value will be used
I0824 23:02:04.655122 101 backend_model.cc:255] model configuration:
{
"name": "conformer-en-US-asr-streaming-feature-extractor-streaming",
"platform": "",
"backend": "riva_asr_features",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 1024,
"input": [
{
"name": "AUDIO_SIGNAL",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "SAMPLE_RATE",
"data_type": "TYPE_UINT32",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "AUDIO_FEATURES",
"data_type": "TYPE_FP32",
"dims": [
80,
-1
],
"label_filename": "",
"is_shape_tensor": false
},
{
"name": "AUDIO_PROCESSED",
"data_type": "TYPE_FP32",
"dims": [
1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"graph": {
"level": 0
},
"priority": "PRIORITY_DEFAULT",
"cuda": {
"graphs": false,
"busy_wait_events": false,
"graph_spec": [],
"output_copy_stream": true
},
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"sequence_batching": {
"oldest": {
"max_candidate_sequences": 1024,
"preferred_batch_size": [
256,
512
],
"max_queue_delay_microseconds": 1000
},
"max_sequence_idle_microseconds": 60000000,
"control_input": [
{
"name": "START",
"control": [
{
"kind": "CONTROL_SEQUENCE_START",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "READY",
"control": [
{
"kind": "CONTROL_SEQUENCE_READY",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "END",
"control": [
{
"kind": "CONTROL_SEQUENCE_END",
"int32_false_true": [
0,
1
],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_INVALID"
}
]
},
{
"name": "CORRID",
"control": [
{
"kind": "CONTROL_SEQUENCE_CORRID",
"int32_false_true": [],
"fp32_false_true": [],
"bool_false_true": [],
"data_type": "TYPE_UINT64"
}
]
}
],
"state": []
},
"instance_group": [
{
"name": "conformer-en-US-asr-streaming-feature-extractor-streaming_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"window_stride": {
"string_value": "0.01"
},
"window_size": {
"string_value": "0.025"
},
"num_features": {
"string_value": "80"
},
"streaming": {
"string_value": "True"
},
"left_padding_size": {
"string_value": "1.92"
},
"stddev_floor": {
"string_value": "1e-05"
},
"transpose": {
"string_value": "False"
},
"right_padding_size": {
"string_value": "1.92"
},
"gain": {
"string_value": "1.0"
},
"use_utterance_norm_params": {
"string_value": "False"
},
"precalc_norm_time_steps": {
"string_value": "0"
},
"precalc_norm_params": {
"string_value": "False"
},
"dither": {
"string_value": "1e-05"
},
"norm_per_feature": {
"string_value": "True"
},
"mean": {
"string_value": "-11.4412, -9.9334, -9.1292, -9.0365, -9.2804, -9.5643, -9.7342, -9.6925, -9.6333, -9.2808, -9.1887, -9.1422, -9.1397, -9.2028, -9.2749, -9.4776, -9.9185, -10.1557, -10.3800, -10.5067, -10.3190, -10.4728, -10.5529, -10.6402, -10.6440, -10.5113, -10.7395, -10.7870, -10.6074, -10.5033, -10.8278, -10.6384, -10.8481, -10.6875, -10.5454, -10.4747, -10.5165, -10.4930, -10.3413, -10.3472, -10.3735, -10.6830, -10.8813, -10.6338, -10.3856, -10.7727, -10.8957, -10.8068, -10.7373, -10.6108, -10.3405, -10.2889, -10.3922, -10.4946, -10.3367, -10.4164, -10.9949, -10.7196, -10.3971, -10.1734, -9.9257, -9.6557, -9.1761, -9.6653, -9.7876, -9.7230, -9.7792, -9.7056, -9.2702, -9.4650, -9.2755, -9.1369, -9.1174, -8.9197, -8.5394, -8.2614, -8.1353, -8.1422, -8.3430, -8.6655"
},
"stddev": {
"string_value": "2.2668, 3.1642, 3.7079, 3.7642, 3.5349, 3.5901, 3.7640, 3.8424, 4.0145, 4.1475, 4.0457, 3.9048, 3.7709, 3.6117, 3.3188, 3.1489, 3.0615, 3.0362, 2.9929, 3.0500, 3.0341, 3.0484, 3.0103, 2.9474, 2.9128, 2.8669, 2.8332, 2.9411, 3.0378, 3.0712, 3.0190, 2.9992, 3.0124, 3.0024, 3.0275, 3.0870, 3.0656, 3.0142, 3.0493, 3.1373, 3.1135, 3.0675, 2.8828, 2.7018, 2.6296, 2.8826, 2.9325, 2.9288, 2.9271, 2.9890, 3.0137, 2.9855, 3.0839, 2.9319, 2.3512, 2.3795, 2.6191, 2.7555, 2.9326, 2.9931, 3.1543, 3.0855, 2.6820, 3.0566, 3.1272, 3.1663, 3.1836, 3.0018, 2.9089, 3.1727, 3.1626, 3.1086, 2.9804, 3.1107, 3.2998, 3.3697, 3.3716, 3.2487, 3.1597, 3.1181"
},
"chunk_size": {
"string_value": "0.16"
},
"max_execution_batch_size": {
"string_value": "1024"
},
"sample_rate": {
"string_value": "16000"
}
},
"model_warmup": [],
"model_transaction_policy": {
"decoupled": false
}
}
I0824 23:02:04.655913 101 feature-extractor.cc:401] TRITONBACKEND_ModelInstanceInitialize: conformer-en-US-asr-offline-feature-extractor-streaming-offline_0 (device 0)
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:09.718234 101 ctc-decoder-library.cc:23] TRITONBACKEND_ModelInstanceInitialize: conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming_0 (device 0)
I0824 23:02:09.718793 101 model_repository_manager.cc:1231] successfully loaded 'conformer-en-US-asr-offline-feature-extractor-streaming-offline' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:11.700796 110 ctc-decoder.cc:171] Beam Decoder initialized successfully!
I0824 23:02:11.701091 101 vad_library.cc:21] TRITONBACKEND_ModelInstanceInitialize: conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming_0 (device 0)
I0824 23:02:11.707552 101 model_repository_manager.cc:1231] successfully loaded 'conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming' version 1
I0824 23:02:11.735048 101 vad_library.cc:21] TRITONBACKEND_ModelInstanceInitialize: conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline_0 (device 0)
I0824 23:02:11.744431 101 model_repository_manager.cc:1231] successfully loaded 'conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming' version 1
I0824 23:02:11.776780 101 model_repository_manager.cc:1231] successfully loaded 'conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline' version 1
I0824 23:02:11.804831 101 pipeline_library.cc:19] TRITONBACKEND_ModelInitialize: riva-punctuation-en-US (version 1)
WARNING: Logging before InitGoogleLogging() is written to STDERR
W0824 23:02:11.805441 116 parameter_parser.cc:120] Parameter bos could not be set from parameters
W0824 23:02:11.805532 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.805596 116 parameter_parser.cc:120] Parameter dropout_prob could not be set from parameters
W0824 23:02:11.805655 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.805723 116 parameter_parser.cc:120] Parameter eos could not be set from parameters
W0824 23:02:11.805778 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.805819 116 parameter_parser.cc:120] Parameter reverse could not be set from parameters
W0824 23:02:11.805871 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.805956 116 parameter_parser.cc:120] Parameter bos could not be set from parameters
W0824 23:02:11.805999 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806033 116 parameter_parser.cc:120] Parameter doc_stride could not be set from parameters
W0824 23:02:11.806087 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806121 116 parameter_parser.cc:120] Parameter dropout_prob could not be set from parameters
W0824 23:02:11.806174 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806207 116 parameter_parser.cc:120] Parameter eos could not be set from parameters
W0824 23:02:11.806260 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806295 116 parameter_parser.cc:120] Parameter margin could not be set from parameters
W0824 23:02:11.806349 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806382 116 parameter_parser.cc:120] Parameter max_batch_size could not be set from parameters
W0824 23:02:11.806435 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806469 116 parameter_parser.cc:120] Parameter max_query_length could not be set from parameters
W0824 23:02:11.806522 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806555 116 parameter_parser.cc:120] Parameter max_seq_length could not be set from parameters
W0824 23:02:11.806608 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806644 116 parameter_parser.cc:120] Parameter reverse could not be set from parameters
W0824 23:02:11.806695 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806736 116 parameter_parser.cc:120] Parameter step could not be set from parameters
W0824 23:02:11.806787 116 parameter_parser.cc:121] Default value will be used
W0824 23:02:11.806828 116 parameter_parser.cc:120] Parameter task could not be set from parameters
W0824 23:02:11.806879 116 parameter_parser.cc:121] Default value will be used
I0824 23:02:11.806977 101 backend_model.cc:255] model configuration:
{
"name": "riva-punctuation-en-US",
"platform": "",
"backend": "riva_nlp_pipeline",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 8,
"input": [
{
"name": "PIPELINE_INPUT",
"data_type": "TYPE_STRING",
"format": "FORMAT_NONE",
"dims": [
1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "PIPELINE_OUTPUT",
"data_type": "TYPE_STRING",
"dims": [
1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"instance_group": [
{
"name": "riva-punctuation-en-US_0",
"kind": "KIND_CPU",
"count": 1,
"gpus": [],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"to_lower": {
"string_value": "true"
},
"pipeline_type": {
"string_value": "punctuation"
},
"capitalization_mapping_path": {
"string_value": "/data/models/riva-punctuation-en-US/1/capit_label_ids.csv"
},
"eos_token": {
"string_value": "[SEP]"
},
"load_model": {
"string_value": "false"
},
"attn_mask_tensor_name": {
"string_value": "input_mask"
},
"token_type_tensor_name": {
"string_value": "segment_ids"
},
"punct_logits_tensor_name": {
"string_value": "punct_token_logits"
},
"language_code": {
"string_value": "en-US"
},
"tokenizer": {
"string_value": "wordpiece"
},
"delimiter": {
"string_value": " "
},
"input_ids_tensor_name": {
"string_value": "input_ids"
},
"model_name": {
"string_value": "riva-trt-riva-punctuation-en-US-nn-bert-base-uncased"
},
"pad_chars_with_spaces": {
"string_value": "False"
},
"remove_spaces": {
"string_value": "False"
},
"tokenizer_to_lower": {
"string_value": "true"
},
"model_family": {
"string_value": "riva"
},
"unk_token": {
"string_value": "[UNK]"
},
"vocab": {
"string_value": "/data/models/riva-punctuation-en-US/1/tokenizer.vocab_file"
},
"bos_token": {
"string_value": "[CLS]"
},
"capit_logits_tensor_name": {
"string_value": "capit_token_logits"
},
"punctuation_mapping_path": {
"string_value": "/data/models/riva-punctuation-en-US/1/punct_label_ids.csv"
},
"model_api": {
"string_value": "/nvidia.riva.nlp.RivaLanguageUnderstanding/PunctuateText"
}
},
"model_warmup": []
}
I0824 23:02:11.807636 101 pipeline_library.cc:22] TRITONBACKEND_ModelInstanceInitialize: riva-punctuation-en-US_0 (device 0)
I0824 23:02:11.818480 101 feature-extractor.cc:401] TRITONBACKEND_ModelInstanceInitialize: conformer-en-US-asr-streaming-feature-extractor-streaming_0 (device 0)
I0824 23:02:11.823758 101 model_repository_manager.cc:1231] successfully loaded 'riva-punctuation-en-US' version 1
I0824 23:02:12.121284 101 tensorrt.cc:5294] TRITONBACKEND_Initialize: tensorrt
I0824 23:02:12.121475 101 tensorrt.cc:5304] Triton TRITONBACKEND API version: 1.9
I0824 23:02:12.121548 101 tensorrt.cc:5310] 'tensorrt' TRITONBACKEND API version: 1.9
I0824 23:02:12.121809 101 tensorrt.cc:5353] backend configuration:
{}
I0824 23:02:12.121942 101 tensorrt.cc:5405] TRITONBACKEND_ModelInitialize: riva-trt-conformer-en-US-asr-offline-am-streaming-offline (version 1)
I0824 23:02:12.122602 101 tensorrt.cc:5454] TRITONBACKEND_ModelInstanceInitialize: riva-trt-conformer-en-US-asr-offline-am-streaming-offline_0 (GPU device 0)
I0824 23:02:12.123533 101 model_repository_manager.cc:1231] successfully loaded 'conformer-en-US-asr-streaming-feature-extractor-streaming' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:13.982613 101 logging.cc:49] [MemUsageChange] Init CUDA: CPU +417, GPU +0, now: CPU 2003, GPU 5996 (MiB)
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:14.675465 101 logging.cc:49] Loaded engine size: 416 MiB
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:15.997027 101 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 2951, GPU 6342 (MiB)
I0824 23:02:16.277753 101 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +127, GPU +58, now: CPU 3078, GPU 6400 (MiB)
I0824 23:02:16.282010 101 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +330, now: CPU 0, GPU 330 (MiB)
I0824 23:02:16.322536 101 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 2245, GPU 6392 (MiB)
I0824 23:02:16.323401 101 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 2245, GPU 6400 (MiB)
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:19.250465 101 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +365, now: CPU 0, GPU 695 (MiB)
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:22.130511 101 tensorrt.cc:1411] Created instance riva-trt-conformer-en-US-asr-offline-am-streaming-offline_0 on GPU 0 with stream priority 0 and optimization profile default[0];
I0824 23:02:22.130794 101 tensorrt.cc:5405] TRITONBACKEND_ModelInitialize: riva-trt-riva-punctuation-en-US-nn-bert-base-uncased (version 1)
I0824 23:02:22.131522 101 tensorrt.cc:5454] TRITONBACKEND_ModelInstanceInitialize: riva-trt-riva-punctuation-en-US-nn-bert-base-uncased_0 (GPU device 0)
I0824 23:02:22.132337 101 logging.cc:49] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 3232, GPU 7669 (MiB)
I0824 23:02:22.141030 101 model_repository_manager.cc:1231] successfully loaded 'riva-trt-conformer-en-US-asr-offline-am-streaming-offline' version 1
I0824 23:02:22.553322 101 logging.cc:49] Loaded engine size: 208 MiB
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:22.745298 101 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 3763, GPU 8011 (MiB)
I0824 23:02:22.746580 101 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +0, GPU +10, now: CPU 3763, GPU 8021 (MiB)
I0824 23:02:22.750318 101 logging.cc:49] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +95, now: CPU 0, GPU 790 (MiB)
I0824 23:02:22.776407 101 logging.cc:49] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 3347, GPU 8013 (MiB)
I0824 23:02:22.777337 101 logging.cc:49] [MemUsageChange] Init cuDNN: CPU +0, GPU +8, now: CPU 3347, GPU 8021 (MiB)
E0824 23:02:22.849849 101 logging.cc:43] /home/jenkins/agent/workspace/OSS/OSS_L0_MergeRequest/oss/plugin/common/bertCommon.h (395) - Cuda Error in copyToDevice: 2 (out of memory)
E0824 23:02:22.853534 101 logging.cc:43] std::exception
E0824 23:02:22.855729 101 logging.cc:43] /home/jenkins/agent/workspace/OSS/OSS_L0_MergeRequest/oss/plugin/common/bertCommon.h (395) - Cuda Error in copyToDevice: 2 (out of memory)
E0824 23:02:22.855860 101 logging.cc:43] std::exception
E0824 23:02:22.857959 101 logging.cc:43] /home/jenkins/agent/workspace/OSS/OSS_L0_MergeRequest/oss/plugin/common/bertCommon.h (395) - Cuda Error in copyToDevice: 2 (out of memory)
E0824 23:02:22.858085 101 logging.cc:43] std::exception
E0824 23:02:22.858954 101 logging.cc:43] /home/jenkins/agent/workspace/OSS/OSS_L0_MergeRequest/oss/plugin/common/bertCommon.h (395) - Cuda Error in copyToDevice: 2 (out of memory)
E0824 23:02:22.859080 101 logging.cc:43] std::exception
E0824 23:02:22.860069 101 logging.cc:43] /home/jenkins/agent/workspace/OSS/OSS_L0_MergeRequest/oss/plugin/common/bertCommon.h (395) - Cuda Error in copyToDevice: 2 (out of memory)
E0824 23:02:22.860191 101 logging.cc:43] std::exception
E0824 23:02:22.865334 101 logging.cc:43] 1: [defaultAllocator.cpp::allocate::18] Error Code 1: Cuda Runtime (out of memory)
W0824 23:02:22.865911 101 logging.cc:46] -------------- The current system memory allocations dump as below --------------
[0x7f6b958e70d0]:66463 :GPU per-runner memory in ExecutionContext: at runtime/api/executionContext.cpp: 160 idx: 149 time: 6.14e-07
[0x7f6b95b689d0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 148 time: 3.33e-07
[0x7f6b95b688b0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 147 time: 5.8e-08
[0x7f6b95b65c20]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 146 time: 1.01e-07
[0x7f6b95b65b00]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 145 time: 8.7e-08
[0x7f6b956d11a0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 143 time: 9.1e-08
[0x7f6b956c5100]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 139 time: 6.5e-08
[0x7f6b956ad4f0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 137 time: 6.4e-08
[0x7f6b956a99f0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 135 time: 1.02e-07
[0x7f6b956920d0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 134 time: 1.17e-07
[0x7f6b95691fb0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 133 time: 1.05e-07
[0x7f6b956901c0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 132 time: 2.25e-07
[0x7f6b9568e410]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 131 time: 1.15e-07
[0x7f6b95676950]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 130 time: 9e-08
[0x7f6b95676830]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 129 time: 9.8e-08
[0x7f6b95672590]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 127 time: 6.8e-08
[0x7f68c6c68d00]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 55 time: 1.92e-07
[0x7f68c5182890]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 31 time: 1.08e-07
[0x7f68c6ab5320]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 53 time: 2.61e-07
[0x7f68c66183a0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 50 time: 1.56e-07
[0x7f69073c0130]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 9 time: 2.44e-07
[0x7f68c646a9f0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 48 time: 2.85e-07
[0x7f68c5fcd950]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 44 time: 2.62e-07
[0x7f68c5e202c0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 43 time: 2.87e-07
[0x7f68c4ceea60]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 29 time: 2.64e-07
[0x7f6b95608630]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 114 time: 9.3e-08
[0x7f68c57f4cc0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 37 time: 1.91e-07
[0x7f68c7db9aa0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 67 time: 1.31e-07
[0x7f68c5339410]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 35 time: 2.3e-07
[0x7f6b95674340]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 128 time: 2.17e-07
[0x7f6907853c90]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 11 time: 6.6e-08
[0x7f690722cdd0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 7 time: 1.56e-07
[0x7f68c5983340]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 40 time: 2.62e-07
[0x7f68c6ad3c50]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 52 time: 5.7e-08
[0x7f68c6489320]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 47 time: 9.1e-08
[0x7f68c44e9b20]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 21 time: 8e-08
[0x7f6907ea8940]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 18 time: 2.63e-07
[0x7f6b955d19c0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 105 time: 1.23e-07
[0x7f68c7f47c90]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 70 time: 2.62e-07
[0x7f68c51aa590]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 32 time: 7e-08
[0x7f6b956d12c0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 144 time: 1.24e-07
[0x7f68bca31ec0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 76 time: 1.22e-07
[0x7f6b95604a80]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 111 time: 9.1e-08
[0x7f68c6c68be0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 54 time: 1.27e-07
[0x7f69070b80d0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 5 time: 3.22e-07
[0x7f69070921a0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 4 time: 1.01e-07
[0x7f6b956ad610]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 138 time: 8.2e-08
[0x7f6906c48be0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 3 time: 1.65e-07
[0x7f68c4b5c3c0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 27 time: 6.3e-08
[0x7f68c5e3ebf0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 42 time: 5e-08
[0x7f690787b5a0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 12 time: 8.6e-08
[0x7f6907e9f780]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 16 time: 6.2e-08
[0x7f68c70fcac0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 56 time: 1.09e-07
[0x7f68c44f2ce0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 23 time: 4.81e-07
[0x7f68bc59df90]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 74 time: 1.61e-07
[0x7f68c53392f0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 34 time: 1.37e-07
[0x7f6906c4efe0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 1 time: 1.85e-07
[0x7f6906c48a80]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 2 time: 6.6e-08
[0x7f68c46a09f0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 24 time: 1.34e-07
[0x7f69073c0250]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 10 time: 2.34e-07
[0x7f68c6aac160]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 51 time: 5.9e-08
[0x7f68c5fcda70]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 45 time: 2.19e-07
[0x7f6b955ecfd0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 109 time: 9.8e-08
[0x7f6907094d30]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 6 time: 9.1e-08
[0x7f68c4ceeb80]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 30 time: 3.32e-07
[0x7f69065964b0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 0 time: 1.11e-07
[0x7f68c4b348d0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 26 time: 7.8e-08
[0x7f690785ccc0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 13 time: 2.77e-07
[0x7f68bde12ee0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 95 time: 2.57e-07
[0x7f6b956ab7a0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 136 time: 3.35e-07
[0x7f68c5983220]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 39 time: 1.51e-07
[0x7f68bd07c050]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 81 time: 6.5e-08
[0x7f6907ec7270]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 17 time: 7.9e-08
[0x7f68c6461830]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 46 time: 1.27e-07
[0x7f69072050e0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 8 time: 4.04e-07
[0x7f6907a0a0b0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 14 time: 1.21e-07
[0x7f68c4055c40]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 19 time: 1.6e-07
[0x7f6b95656fe0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 123 time: 7.2e-08
[0x7f68c6618280]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 49 time: 2.62e-07
[0x7f68c57d6390]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 38 time: 3.05e-07
[0x7f68c7d9b170]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 68 time: 3.77e-07
[0x7f68c7f47b70]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 69 time: 2.64e-07
[0x7f6b955b49a0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 100 time: 3.11e-07
[0x7f68c4055d60]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 20 time: 5.4e-07
[0x7f68c4511610]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 22 time: 1.02e-07
[0x7f68c72b2e60]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 60 time: 2.01e-07
[0x7f68c71245b0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 57 time: 5.2e-08
[0x7f68c57cd1d0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 36 time: 8.2e-08
[0x7f68c5e17100]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 41 time: 8.2e-08
[0x7f68c46a0b10]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 25 time: 1.91e-07
[0x7f68c4b3da90]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 28 time: 2.9e-07
[0x7f68bc3e7770]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 71 time: 8.3e-08
[0x7f6b956c6eb0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 140 time: 2.95e-07
[0x7f6b955cdf30]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 103 time: 1.01e-07
[0x7f6b955e9540]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 107 time: 8.1e-08
[0x7f68c72b2d40]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 59 time: 2.93e-07
[0x7f68bdd1a2f0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 93 time: 2.62e-07
[0x7f68c7746c20]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 61 time: 2.55e-07
[0x7f68c78fe200]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 65 time: 2.25e-07
[0x7f68c776e920]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 62 time: 4.6e-08
[0x7f68c774fff0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 63 time: 2.84e-07
[0x7f6b953399c0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 98 time: 4.51e-07
[0x7f6907a0a1d0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 15 time: 2.16e-07
[0x7f68c78fe0e0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 64 time: 2.62e-07
[0x7f68bc40f260]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 72 time: 8.2e-08
[0x7f68c7105c80]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 58 time: 2.8e-07
[0x7f68bc59e0b0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 75 time: 1.21e-07
[0x7f68bca59bc0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 77 time: 7.8e-08
[0x7f68bca3b290]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 78 time: 2.85e-07
[0x7f6b956c8d90]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 142 time: 1.05e-07
[0x7f68bd2329a0]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 85 time: 2.48e-07
[0x7f6b955b6950]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 102 time: 1.77e-07
[0x7f68bcbe8120]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 79 time: 1.49e-07
[0x7f68bd6cf970]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 88 time: 2.6e-07
[0x7f68bcbe8240]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 80 time: 1.81e-07
[0x7f6b9563f5d0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 121 time: 7.5e-08
[0x7f68bd0a3d50]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 82 time: 4.4e-08
[0x7f68bd87d320]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 90 time: 2.08e-07
[0x7f68bd085420]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 83 time: 2.28e-07
[0x7f6b9563ba00]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 119 time: 8.5e-08
[0x7f6b956c8c70]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 141 time: 9.7e-08
[0x7f68bd232880]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 84 time: 2.64e-07
[0x7f6b955b6830]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 101 time: 2.57e-07
[0x7f6b9563f6f0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 122 time: 8.4e-08
[0x7f68c518bc60]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 33 time: 2.96e-07
[0x7f68bd6c67b0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 86 time: 6.6e-08
[0x7f68bd6ee2a0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 87 time: 4.5e-08
[0x7f68bd87d200]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 89 time: 1.45e-07
[0x7f68bdd11130]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 91 time: 7.7e-08
[0x7f6b95606840]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 112 time: 1.98e-07
[0x7f6b956219f0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 116 time: 3.61e-07
[0x7f6b956239b0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 117 time: 8.3e-08
[0x7f68bc3f0930]:2527 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 73 time: 3.13e-07
[0x7f68bdd38c20]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 92 time: 8.3e-08
[0x7f6b95608510]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 113 time: 8.3e-08
[0x7f68bddccac0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 94 time: 8.5e-08
[0x7f68b0c8b600]:224383 :GPU per-runner memory in ExecutionContext: at runtime/api/executionContext.cpp: 160 idx: 96 time: 4.18e-07
[0x7f6b95339920]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 97 time: 2.8e-07
[0x7f6b955ce050]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 104 time: 2.16e-07
[0x7f6b955d1ae0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 106 time: 1.46e-07
[0x7f6b955b2d70]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 99 time: 1.35e-07
[0x7f6b955eb0f0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 108 time: 3.39e-07
[0x7f6b955ed0f0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 110 time: 1.09e-07
[0x7f6b9561fe80]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 115 time: 9.7e-08
[0x7f6b95623ad0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 118 time: 1.05e-07
[0x7f68c7d91fb0]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 66 time: 1.97e-07
[0x7f6b9563cfa0]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 120 time: 3.47e-07
[0x7f6b95658d90]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 124 time: 2.57e-07
[0x7f6b9565aa10]:4 : in internalAllocate: at runtime/common/weightsPtr.cpp: 100 idx: 125 time: 6.5e-08
[0x7f6b9565ab30]:2303 :Safe deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 348 idx: 126 time: 8e-08
-------------- The current device memory allocations dump as below --------------
[0x101ca000000]:99257344 :GpuGlob deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 361 idx: 3 time: 0.000567058
[0]:99257344 :GPU per-runner memory in ExecutionContext: at runtime/api/executionContext.cpp: 163 idx: 4 time: 0.00508422
[0x10176000000]:349485056 :GPU context memory in ExecutionContext: at runtime/api/executionContext.cpp: 214 idx: 2 time: 0.00049535
[0x10174000000]:33548288 :GPU per-runner memory in ExecutionContext: at runtime/api/executionContext.cpp: 163 idx: 1 time: 0.000300524
[0x1006ce00000]:346586128 :GpuGlob deserialization in load: at runtime/deserialization/safeDeserialize.cpp: 361 idx: 0 time: 0.00076128
W0824 23:02:22.866533 101 logging.cc:46] Requested amount of GPU memory (99257344 bytes) could not be allocated. There may not be enough free memory for allocation to succeed.
E0824 23:02:22.876246 101 logging.cc:43] 2: [executionContext.cpp::ExecutionContext::163] Error Code 2: OutOfMemory (no further information)
I0824 23:02:22.876372 101 tensorrt.cc:5492] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:22.876502 101 tensorrt.cc:5431] TRITONBACKEND_ModelFinalize: delete model state
E0824 23:02:22.876614 101 model_repository_manager.cc:1234] failed to load 'riva-trt-riva-punctuation-en-US-nn-bert-base-uncased' version 1: Internal: unable to create TensorRT context
I0824 23:02:22.877260 101 model_repository_manager.cc:1077] loading: conformer-en-US-asr-offline:1
I0824 23:02:22.977592 101 model_repository_manager.cc:1231] successfully loaded 'conformer-en-US-asr-offline' version 1
I0824 23:02:22.977761 101 server.cc:549]
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+
I0824 23:02:22.977914 101 server.cc:576]
+-------------------+-----------------------------------------------------------------------------+--------+
| Backend | Path | Config |
+-------------------+-----------------------------------------------------------------------------+--------+
| onnxruntime | /opt/tritonserver/backends/onnxruntime/libtriton_onnxruntime.so | {} |
| riva_asr_decoder | /opt/tritonserver/backends/riva_asr_decoder/libtriton_riva_asr_decoder.so | {} |
| tensorrt | /opt/tritonserver/backends/tensorrt/libtriton_tensorrt.so | {} |
| riva_asr_vad | /opt/tritonserver/backends/riva_asr_vad/libtriton_riva_asr_vad.so | {} |
| riva_asr_features | /opt/tritonserver/backends/riva_asr_features/libtriton_riva_asr_features.so | {} |
| riva_nlp_pipeline | /opt/tritonserver/backends/riva_nlp_pipeline/libtriton_riva_nlp_pipeline.so | {} |
+-------------------+-----------------------------------------------------------------------------+--------+
I0824 23:02:22.978121 101 server.cc:619]
+---------------------------------------------------------------------------+---------+----------------------------------------------------------+
| Model | Version | Status |
+---------------------------------------------------------------------------+---------+----------------------------------------------------------+
| conformer-en-US-asr-offline | 1 | READY |
| conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline | 1 | READY |
| conformer-en-US-asr-offline-feature-extractor-streaming-offline | 1 | READY |
| conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline | 1 | READY |
| conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming | 1 | READY |
| conformer-en-US-asr-streaming-feature-extractor-streaming | 1 | READY |
| conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming | 1 | READY |
| riva-punctuation-en-US | 1 | READY |
| riva-trt-conformer-en-US-asr-offline-am-streaming-offline | 1 | READY |
| riva-trt-riva-punctuation-en-US-nn-bert-base-uncased | 1 | UNAVAILABLE: Internal: unable to create TensorRT context |
+---------------------------------------------------------------------------+---------+----------------------------------------------------------+
I0824 23:02:23.000316 101 metrics.cc:650] Collecting metrics for GPU 0: GRID A100D-8C
I0824 23:02:23.000630 101 tritonserver.cc:2123]
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option | Value |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id | triton |
| server_version | 2.21.0 |
| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |
| model_repository_path[0] | /data/models |
| model_control_mode | MODE_NONE |
| strict_model_config | 1 |
| rate_limit | OFF |
| pinned_memory_pool_byte_size | 268435456 |
| cuda_memory_pool_byte_size{0} | 1000000000 |
| response_cache_byte_size | 0 |
| min_supported_compute_capability | 6.0 |
| strict_readiness | 1 |
| exit_timeout | 30 |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0824 23:02:23.000821 101 server.cc:250] Waiting for in-flight requests to complete.
I0824 23:02:23.000896 101 server.cc:266] Timeout 30: Found 0 model versions that have in-flight inferences
I0824 23:02:23.000980 101 model_repository_manager.cc:1109] unloading: conformer-en-US-asr-streaming-feature-extractor-streaming:1
I0824 23:02:23.001105 101 model_repository_manager.cc:1109] unloading: conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming:1
I0824 23:02:23.001222 101 model_repository_manager.cc:1109] unloading: riva-trt-conformer-en-US-asr-offline-am-streaming-offline:1
I0824 23:02:23.001333 101 model_repository_manager.cc:1109] unloading: riva-punctuation-en-US:1
I0824 23:02:23.001446 101 model_repository_manager.cc:1109] unloading: conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline:1
I0824 23:02:23.001554 101 model_repository_manager.cc:1109] unloading: conformer-en-US-asr-offline-feature-extractor-streaming-offline:1
I0824 23:02:23.001662 101 model_repository_manager.cc:1109] unloading: conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming:1
I0824 23:02:23.001782 101 model_repository_manager.cc:1109] unloading: conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline:1
I0824 23:02:23.001894 101 model_repository_manager.cc:1109] unloading: conformer-en-US-asr-offline:1
I0824 23:02:23.002001 101 server.cc:281] All models are stopped, unloading models
I0824 23:02:23.002072 101 server.cc:288] Timeout 30: Found 9 live models and 0 in-flight non-inference requests
I0824 23:02:23.002282 101 vad_library.cc:24] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.002606 101 ctc-decoder-library.cc:25] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.003026 101 feature-extractor.cc:403] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.002490 101 model_repository_manager.cc:1214] successfully unloaded 'conformer-en-US-asr-offline' version 1
I0824 23:02:23.007804 101 vad_library.cc:24] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.013304 101 pipeline_library.cc:24] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.014370 101 tensorrt.cc:5492] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.015773 101 ctc-decoder-library.cc:25] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.022082 101 pipeline_library.cc:21] TRITONBACKEND_ModelFinalize: delete model stateI0824 23:02:23.022221 101 feature-extractor.cc:403] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0824 23:02:23.034687 101 vad_library.cc:20] TRITONBACKEND_ModelFinalize: delete model state
I0824 23:02:23.034938 101 vad_library.cc:20] TRITONBACKEND_ModelFinalize: delete model state
I0824 23:02:23.040116 101 model_repository_manager.cc:1214] successfully unloaded 'conformer-en-US-asr-streaming-voice-activity-detector-ctc-streaming' version 1
I0824 23:02:23.049024 101 model_repository_manager.cc:1214] successfully unloaded 'conformer-en-US-asr-offline-voice-activity-detector-ctc-streaming-offline' version 1
I0824 23:02:23.049226 101 model_repository_manager.cc:1214] successfully unloaded 'riva-punctuation-en-US' version 1
I0824 23:02:23.055651 101 feature-extractor.cc:400] TRITONBACKEND_ModelFinalize: delete model state
I0824 23:02:23.056024 101 model_repository_manager.cc:1214] successfully unloaded 'conformer-en-US-asr-streaming-feature-extractor-streaming' version 1
I0824 23:02:23.086297 101 feature-extractor.cc:400] TRITONBACKEND_ModelFinalize: delete model state
I0824 23:02:23.087447 101 model_repository_manager.cc:1214] successfully unloaded 'conformer-en-US-asr-offline-feature-extractor-streaming-offline' version 1
I0824 23:02:23.466094 101 tensorrt.cc:5431] TRITONBACKEND_ModelFinalize: delete model state
I0824 23:02:23.472491 101 model_repository_manager.cc:1214] successfully unloaded 'riva-trt-conformer-en-US-asr-offline-am-streaming-offline' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:24.002232 101 server.cc:288] Timeout 29: Found 2 live models and 0 in-flight non-inference requests
W0824 23:02:24.326208 101 metrics.cc:426] Unable to get power limit for GPU 0. Status:Success, value:0.000000
W0824 23:02:24.326418 101 metrics.cc:444] Unable to get power usage for GPU 0. Status:Success, value:0.000000
W0824 23:02:24.326499 101 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
I0824 23:02:24.326655 101 ctc-decoder-library.cc:22] TRITONBACKEND_ModelFinalize: delete model state
I0824 23:02:24.386914 101 model_repository_manager.cc:1214] successfully unloaded 'conformer-en-US-asr-offline-ctc-decoder-cpu-streaming-offline' version 1
I0824 23:02:24.455230 101 ctc-decoder-library.cc:22] TRITONBACKEND_ModelFinalize: delete model state
I0824 23:02:24.513075 101 model_repository_manager.cc:1214] successfully unloaded 'conformer-en-US-asr-streaming-ctc-decoder-cpu-streaming' version 1
> Riva waiting for Triton server to load all models...retrying in 1 second
I0824 23:02:25.002513 101 server.cc:288] Timeout 28: Found 0 live models and 0 in-flight non-inference requests
error: creating server: Internal - failed to load all models
W0824 23:02:25.326667 101 metrics.cc:426] Unable to get power limit for GPU 0. Status:Success, value:0.000000
W0824 23:02:25.326882 101 metrics.cc:444] Unable to get power usage for GPU 0. Status:Success, value:0.000000
W0824 23:02:25.326957 101 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
> Riva waiting for Triton server to load all models...retrying in 1 second
W0824 23:02:26.328402 101 metrics.cc:426] Unable to get power limit for GPU 0. Status:Success, value:0.000000
W0824 23:02:26.328612 101 metrics.cc:444] Unable to get power usage for GPU 0. Status:Success, value:0.000000
W0824 23:02:26.328686 101 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
> Riva waiting for Triton server to load all models...retrying in 1 second
> Triton server died before reaching ready state. Terminating Riva startup.
Thank you for your time and guideance.