Thanks @fanzh for quick reply
I was trying to implement this using Triton python backend with deepstream nvinferserver
I changed some part of code, such that instead of random numbers in the request, it will be batch of frames as tensors, from my deepstream pipeline.
I am getting errors.
Error says that
ERROR: infer_trtis_server.cpp:268 Triton: TritonServer response error received., triton_err_str:Internal, err_msg:Python model 'centerface_0' is using the decoupled mode and the execute function must return None.
ERROR: infer_trtis_backend.cpp:629 Triton server failed to parse response with request-id:0 model:
ERROR: infer_trtis_backend.cpp:372 failed to specify dims after running inference failed on model:centerface, nvinfer error:NVDSINFER_TRITON_ERROR
0:00:12.139632808 29 0x39a4ca0 ERROR nvinferserver gstnvinferserver.cpp:408:gst_nvinfer_server_logger:<primary-inference> nvinferserver[UID 1]: Error in specifyBackendDims() <infer_trtis_context.cpp:204> [UID = 1]: failed to specify input dims triton backend for model:centerface, nvinfer error:NVDSINFER_TRITON_ERROR
This is content of model.py in python backend
async def execute(self, requests):
processed_requests = []
async_tasks = []
for request in requests:
frame_tensors = pb_utils.get_input_tensor_by_name(
request, "INPUT0"
).as_numpy()
for frame_tensor in frame_tensors:
frame = frame_tensor[0]
# if frame < 0:
# self.raise_value_error(requests)
async_tasks.append(asyncio.create_task(asyncio.sleep(1)))
processed_requests.append(
{
"response_sender": request.get_response_sender(),
"batch_size": frame_tensors.shape[0],
}
)
# This decoupled execute should be scheduled to run in the background
# concurrently with other instances of decoupled execute, as long as the event
# loop is not blocked.
await asyncio.gather(*async_tasks)
for p_req in processed_requests:
response_sender = p_req["response_sender"]
batch_size = p_req["batch_size"]
stats = np.array([[10,10,100,100,3]])
stats = np.tile(stats, (batch_size, 1,1))
# logger.log_warn(f"{stats.shape}")
stats = stats.astype(np.float32)
shape = np.array([int(stats.shape[1]),int(stats.shape[2])])
shape = np.tile(shape, (batch_size, 1,1)) # batch size
shape = shape.astype(np.float32)
out_tensor = pb_utils.Tensor("OUTPUT0", stats)
out_tensor2 = pb_utils.Tensor("OUTPUT1", shape)
# responses.append(pb_utils.InferenceResponse([out_tensor,out_tensor2]))
# output_tensors = pb_utils.Tensor(
# "OUTPUT0", np.array([0 for i in range(batch_size)], np.float32)
# )
response = pb_utils.InferenceResponse(output_tensors=[out_tensor,out_tensor2])
response_sender.send(
response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
)
print('B Here I AM')
return None
def raise_value_error(self, requests):
# TODO: Model may raise exception without sending complete final
for request in requests:
response_sender = request.get_response_sender()
response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
raise ValueError("wait_secs cannot be negative")
And this is config pbtxt
name: "centerface"
backend: "python"
max_batch_size: 4
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 480, 640,3 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ -1 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ -1 ]
}
]
instance_group [
{
count: 1
kind : KIND_CPU
}
]
parameters: {
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {
string_value:"yes"
}
}
model_transaction_policy { decoupled: true }
Also by pb, I meant python backend, not protbuf.
Please let me know how to fix the issue.