Hi,
I’ve successfully implemented the custom_parse_classifier_func
for OCRNet and validated that it correctly sends data to the pipeline.
My pipeline is streamux > nvdspreprocess > pgie (triton-server)
with a probe function on PGIE.
I use Gst-nvdspreprocess
to send only the text area for OCR. Everything works perfectly up to the PGIE, but I can’t retrieve the data in the probe because frame_meta.obj_meta_list
appears as NONE.
I seem to be missing something in the implementation.
snippet code
gst_buffer = info.get_buffer()
if not gst_buffer:
print("Unable to get GstBuffer ")
return
batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
l_frame = batch_meta.frame_meta_list
while l_frame:
try:
frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
except StopIteration:
break
frame_number=frame_meta.frame_num
l_obj=frame_meta.obj_meta_list ## THIS IS NONE
Triton Server Conf
name: "nvidia-ocrnet"
platform: "tensorrt_plan"
max_batch_size: 32
input [
{
name: "input"
data_type: TYPE_FP32
format: FORMAT_NCHW
dims: [1 , 32, 100]
}
]
output [
{
name: "output_id"
data_type: TYPE_INT32
dims: [ 26 ]
},
{
name: "output_prob"
data_type: TYPE_FP32
dims: [ 26 ]
},
{
name: "798"
data_type: TYPE_INT32
dims: [ 26 ]
}
]
instance_group [
{
count: 1
kind: KIND_GPU
gpus: [ 0 ]
}
]
version_policy: { latest: { num_versions: 1}}
dynamic_batching {
max_queue_delay_microseconds: 0
}
config_preprocess.txt
[property]
enable=1
target-unique-ids=1
process-on-frame=1
# if enabled maintain the aspect ratio while scaling
maintain-aspect-ratio=1
# if enabled pad symmetrically with maintain-aspect-ratio enabled
symmetric-padding=1
# processing width/height at which image scaled
processing-width=100
processing-height=32
scaling-buf-pool-size=6
tensor-buf-pool-size=6
# 0=NCHW, 1=NHWC, 2=CUSTOM
network-input-order=0
# tensor shape based on network-input-order
network-input-shape=32;1;32;100
# 0=RGB, 1=BGR, 2=GRAY
network-color-format=2
# 0=FP32, 1=UINT8, 2=INT8, 3=UINT32, 4=INT32, 5=FP16
tensor-data-type=0
tensor-name=input
# 0=NVBUF_MEM_DEFAULT 1=NVBUF_MEM_CUDA_PINNED 2=NVBUF_MEM_CUDA_DEVICE 3=NVBUF_MEM_CUDA_UNIFIED
scaling-pool-memory-type=0
# 0=NvBufSurfTransformCompute_Default 1=NvBufSurfTransformCompute_GPU 2=NvBufSurfTransformCompute_VIC
scaling-pool-compute-hw=0
# Scaling Interpolation method
# 0=NvBufSurfTransformInter_Nearest 1=NvBufSurfTransformInter_Bilinear 2=NvBufSurfTransformInter_Algo1
# 3=NvBufSurfTransformInter_Algo2 4=NvBufSurfTransformInter_Algo3 5=NvBufSurfTransformInter_Algo4
# 6=NvBufSurfTransformInter_Default
scaling-filter=0
custom-lib-path=/opt/nvidia/deepstream/deepstream/lib/gst-plugins/libcustom2d_preprocess.so
custom-tensor-preparation-function=CustomTensorPreparation
output-tensor-meta=1
[user-configs]
pixel-normalization-factor=0.00784313
#mean-file=
offsets=127.5
[group-0]
src-ids=0
custom-input-transformation-function=CustomAsyncTransformation
process-on-roi=1
roi-params-src-0=85;121;235;61
draw-roi=1
roi-color=1;1;1;1
pgie_conf.txt
infer_config {
unique_id: 1
gpu_ids: [0]
max_batch_size: 32
backend {
triton {
model_name: "nvidia-ocrnet"
version: -1
grpc {
url: "127.0.0.1:8001"
enable_cuda_buffer_sharing: true
}
}
}
input_tensor_from_meta {
is_first_dim_batch : true
}
#preprocess {
# network_format: IMAGE_FORMAT_GRAY
# tensor_order: TENSOR_ORDER_NONE
# normalize {
# scale_factor: 0.00784313
# }
#}
postprocess {
classification {
threshold:0.2
custom_parse_classifier_func: "NvDsInferParseOCRNetCTC"
}
}
extra {
copy_input_to_host_buffers: false
output_buffer_pool_size: 6
}
custom_lib {
path: "/apps/custom_lib/nvocr/nvinfer_ocrnet_parser.so"
}
}
input_control {
process_mode : PROCESS_MODE_FULL_FRAME
interval : 0
}
output_control {
output_tensor_meta: true
}
custom_parse_classifier_func
#include <string>
#include <vector>
#include <iostream>
#include <locale>
#include <cstring>
#include "nvdsinfer_custom_impl.h"
using namespace std;
using std::string;
using std::vector;
static bool ocr_dict_ready = false;
std::vector<string> ocr_dict_table;
/* C-linkage to prevent name-mangling */
extern "C"
bool NvDsInferParseOCRNetCTC(std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo, float classifierThreshold,
std::vector<NvDsInferAttribute> &attrList, std::string &attrString);
extern "C"
bool NvDsInferParseOCRNetCTC(std::vector<NvDsInferLayerInfo> const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo, float classifierThreshold,
std::vector<NvDsInferAttribute> &attrList, std::string &attrString)
{
NvDsInferAttribute OCR_attr;
if (!ocr_dict_ready) {
static const char* hardcodedOCRDict[] = {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
"k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
"u", "v", "w", "x", "y", "z"
};
ocr_dict_table.emplace_back("CTCBlank");
for (size_t i = 0; i < std::extent<decltype(hardcodedOCRDict)>::value; ++i) {
ocr_dict_table.emplace_back(hardcodedOCRDict[i]);
}
ocr_dict_ready = true;
}
if (outputLayersInfo.size() != 3)
{
std::cerr << "Mismatch in the number of output buffers."
<< "Expected 3 output buffers, detected in the network: "
<< outputLayersInfo.size() << std::endl;
return false;
}
auto layerFinder = [&outputLayersInfo](const std::string &name)
-> const NvDsInferLayerInfo *{
for (auto &layer : outputLayersInfo) {
if (layer.layerName && name == layer.layerName) {
return &layer;
}
}
return nullptr;
};
const NvDsInferLayerInfo *output_id = layerFinder("output_id");
const NvDsInferLayerInfo *output_prob = layerFinder("output_prob");
const NvDsInferLayerInfo *_798 = layerFinder("798");
if (!output_id || !output_prob || !_798 ) {
if (!output_id) {
std::cerr << " - output_id: Missing or unsupported data type." << std::endl;
}
if (!output_prob) {
std::cerr << " - output_prob: Missing or unsupported data type." << std::endl;
}
if (!_798) {
std::cerr << " - 798: Missing or unsupported data type." << std::endl;
}
return false;
}
if(output_id->inferDims.numDims != 1U) {
std::cerr << "Network output_id dims is : " <<
output_id->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(output_prob->inferDims.numDims != 1U) {
std::cerr << "Network output_prob dims is : " <<
output_prob->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
if(_798->inferDims.numDims != 1U) {
std::cerr << "Network 798 dims is : " <<
_798->inferDims.numDims << " expect is 1"<< std::endl;
return false;
}
int batch_size = 1;
int output_len = output_prob->inferDims.d[0];
//std::cout << "Batch size: " << batch_size << std::endl;
//std::cout << "Output length: " << output_len << std::endl;
//std::cout << "networkInfo.width: " << networkInfo.width << std::endl;
std::vector<std::pair<std::string, float>> temp_de_texts;
int *output_id_data = reinterpret_cast<int*>(output_id->buffer);
float *output_prob_data = reinterpret_cast<float*>(output_prob->buffer);
for(int batch_idx = 0; batch_idx < batch_size; ++batch_idx)
{
int b_offset = batch_idx * output_len;
int prev = output_id_data[b_offset];
std::vector<int> temp_seq_id = {prev};
std::vector<float> temp_seq_prob = {output_prob_data[b_offset]};
for(int i = 1 ; i < output_len; ++i)
{
if (output_id_data[b_offset + i] != prev)
{
temp_seq_id.push_back(output_id_data[b_offset + i]);
temp_seq_prob.push_back(output_prob_data[b_offset + i]);
prev = output_id_data[b_offset + i];
}
}
std::string de_text = "";
float prob = 1.0;
for(size_t i = 0; i < temp_seq_id.size(); ++i)
{
if (temp_seq_id[i] != 0)
{
if (temp_seq_id[i] <= static_cast<int>(ocr_dict_table.size()) - 1)
{
de_text += ocr_dict_table[temp_seq_id[i]];
prob *= temp_seq_prob[i];
}
else
{
std::cerr << "[ERROR] Character dict is not compatible with OCRNet TRT engine." << std::endl;
}
}
}
temp_de_texts.emplace_back(std::make_pair(de_text, prob));
}
attrString = "";
for (const auto& temp_text : temp_de_texts) {
if (temp_text.second >= classifierThreshold) {
attrString += temp_text.first;
}
//std::cout << "Decoded text: " << temp_text.first << ", Probability: " << temp_text.second << ", Threshold: " << classifierThreshold << std::endl;
}
OCR_attr.attributeIndex = 0;
OCR_attr.attributeValue = 1;
OCR_attr.attributeLabel = strdup(attrString.c_str());
OCR_attr.attributeConfidence = 1.0;
for (const auto& temp_text : temp_de_texts) {
OCR_attr.attributeConfidence *= temp_text.second;
}
// std::cout << "attributeIndex: " << OCR_attr.attributeIndex << std::endl;
// std::cout << "attributeValue: " << OCR_attr.attributeValue << std::endl;
// std::cout << "attributeLabel: " << OCR_attr.attributeLabel << std::endl;
// std::cout << "attributeConfidence: " << OCR_attr.attributeConfidence << std::endl;
attrList.push_back(OCR_attr);
return true;
}
CHECK_CUSTOM_CLASSIFIER_PARSE_FUNC_PROTOTYPE(NvDsInferParseOCRNetCTC);