Hello @fanzh
Many thanks, That solved the configuration problem. But i’m facing a new problem now and I’m not sure if it’s model related or is the problem with triton tensorflow backend.
The model is expected to output [-1, 16] tensor. The -1 is a dynamic value that is determined based on the number of objects detected.
As an example, this photo below:
This image outputs a tensor of size [25, 16] meaning the model detected 25 faces.
However, when using Triton, for some reason the model always outputs only 1 detection [1, 16]
Here’s the configuration file I’m using now:
name: "retinaface-tf"
platform: "tensorflow_savedmodel"
backend: "tensorflow"
max_batch_size : 32
input [
{
name: "input_image"
data_type: TYPE_FP32
# format: FORMAT_NCHW
dims: [-1,-1,-1,3]
}
]
output [
{
name: "tf_op_layer_GatherV2"
data_type: TYPE_FP32
dims: [ -1 , 16 ]
}
]
Here’s the script I’m using to run the client:
import tritonclient.http as tritonhttpclient
import numpy as np
from PIL import Image
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import os
import numpy as np
import tensorflow as tf
import time
from modules.models import RetinaFaceModel
from modules.utils import (set_memory_growth, load_yaml, draw_bbox_landm,
pad_input_image, recover_pad_output)
# Triton: ===============
VERBOSE = False
input_name = 'input_image'
input_shape = (1, 640, 640, 3) # (-1,-1,-1,3)
input_dtype = 'FP32'
#output_names = ["conf","bbox", "landmarks"]
output_name = "tf_op_layer_GatherV2"
model_name = 'retinaface-tf'
url = '0.0.0.0:8000'
model_version = '1'
# Model ===============
cfg = load_yaml("./configs/retinaface_res50.yaml")
# define network
model = RetinaFaceModel(cfg, training=False, iou_th=0.4,
score_th=0.5)
# load checkpoint
checkpoint_dir = './checkpoints/' + cfg['sub_name']
checkpoint = tf.train.Checkpoint(model=model)
if tf.train.latest_checkpoint(checkpoint_dir):
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
print("[*] load ckpt from {}.".format(
tf.train.latest_checkpoint(checkpoint_dir)))
else:
print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
exit()
# Image ===============
#set_memory_growth()
img_raw = cv2.imread('./data/0_Parade_marchingband_1_149.jpg')
img_height_raw, img_width_raw, _ = img_raw.shape
img = np.float32(img_raw.copy())
img = cv2.resize(img, (640, 640), interpolation=cv2.INTER_LINEAR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# pad input image to avoid unmatched shape problem
img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
#img = img.reshape([3, 640, 640])
print(np.shape(img))
print(np.shape(img[np.newaxis, ...]))
# run model
image_numpy = img[np.newaxis, ...]
triton_client = tritonhttpclient.InferenceServerClient(url=url, verbose=VERBOSE)
model_metadata = triton_client.get_model_metadata(model_name=model_name, model_version=model_version)
model_config = triton_client.get_model_config(model_name=model_name, model_version=model_version)
print(model_config)
input0 = tritonhttpclient.InferInput(input_name, input_shape, input_dtype)
input0.set_data_from_numpy(image_numpy, binary_data=False)
# outputs = []
# for output_name in output_names:
# outputs.append(tritonhttpclient.InferRequestedOutput(output_name, binary_data=True))
# response = triton_client.infer(model_name, model_version=model_version,
# inputs=[input0], outputs=outputs)
output = tritonhttpclient.InferRequestedOutput(output_name, binary_data=False)
response = triton_client.infer(model_name, model_version=model_version,
inputs=[input0], outputs=[output])
logits = response.as_numpy(output_name)
logits = np.asarray(logits, dtype=np.float32)
print(logits.shape)
print(response)
#logits = response.as_numpy(output_name)
#print(logits)
#print(np.shape(logits))
#logits = np.asarray(logits, dtype=np.float32)
#print(logits.shape)
#print(logits)
# recover padding effect
outputs = recover_pad_output(logits, pad_params)
print(outputs.shape)
# draw and save results
for prior_index in range(len(outputs)):
draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw,
img_width_raw)
cv2.imwrite("./outputs/image.jpg", img_raw)
print(f"[*] save result at /output")
Here’s my model structure:
.
├── retinaface-tf
│ ├── 1
│ | ├── config.pbtxt
│ │ └── model.savedmodel
│ │ ├── assets
│ │ ├── saved_model.pb
│ │ └── variables
│ │ ├── variables.data-00000-of-00001
│ │ └── variables.index
Here’s the model link uploaded to google drive: retinaface-tf - Google Drive
when performing health check on the model here’s the output:
yousef@yousef-Dell-G15-5510:~/Desktop/Triton-models/retinaface-tf$ curl -v 0.0.0.0:8000/v2/models/retinaface-tf
* Trying 0.0.0.0:8000...
* TCP_NODELAY set
* Connected to 0.0.0.0 (127.0.0.1) port 8000 (#0)
> GET /v2/models/retinaface-tf HTTP/1.1
> Host: 0.0.0.0:8000
> User-Agent: curl/7.68.0
> Accept: */*
>
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< Content-Type: application/json
< Content-Length: 227
<
* Connection #0 to host 0.0.0.0 left intact
{"name":"retinaface-tf","versions":["1"],"platform":"tensorflow_savedmodel","inputs":[{"name":"input_image","datatype":"FP32","shape":[-1,-1,-1,3]}],"outputs":[{"name":"tf_op_layer_GatherV2","datatype":"FP32","shape":[-1,16]}]}
Kindly advise, thank you.