Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU)
Jetson TX2
• DeepStream Version
5.0-20.07
• JetPack Version (valid for Jetson only)
4.4 [L4T 32.4.3]
• TensorRT Version
7.1.3.0
I have converted a tensorflow mobilenet network to an uff model using the following procedure:
-
Create compatible trt tensorflow graph_def using the tf_trt_models code.
-
convert to uff using the code:
_ = uff.from_tensorflow(
graph_def,
output_nodes=output_names,
output_filename=“mobilenet.uff”,
text=True,
debug_mode=True,
) -
Create engine (.bin) file using the code:
with trt.Builder( TRT_LOGGER ) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input("input", (3, 224, 224)) for output_name in output_names: print(f"Registered output {output_name}") parser.register_output(output_name) parser.parse("mobilenet.uff", network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open("mobilenet.bin", "wb") as f: f.write(buf)
Then, I have tested the model-engine-file (mobilenet.bin) using this python code:
class TrtMobilenet(object):
def _load_engine(self):
with open(self.model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
def _create_context(self):
for binding in self.engine:
size = (
trt.volume(self.engine.get_binding_shape(binding))
* self.engine.max_batch_size
)
host_mem = cuda.pagelocked_empty(size, np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
self.bindings.append(int(cuda_mem))
if self.engine.binding_is_input(binding):
self.host_inputs.append(host_mem)
self.cuda_inputs.append(cuda_mem)
else:
self.host_outputs.append(host_mem)
self.cuda_outputs.append(cuda_mem)
return self.engine.create_execution_context()
def __init__(self, model_path, input_shape):
"""Initialize TensorRT plugins, engine and conetxt."""
self.model_path = model_path
self.input_shape = input_shape
self.trt_logger = trt.Logger(trt.Logger.INFO)
self.engine = self._load_engine()
self.host_inputs = []
self.cuda_inputs = []
self.host_outputs = []
self.cuda_outputs = []
self.bindings = []
self.stream = cuda.Stream()
self.context = self._create_context()
def __del__(self):
"""Free CUDA memories."""
del self.stream
del self.cuda_outputs
del self.cuda_inputs
def read(self, path):
"""Read and resize image."""
img = Image.open(path).resize(self.input_shape)
return np.asarray(img)
def preprocess(self, img):
img = img.transpose((2, 0, 1)).astype(np.float32)
# no need normalization
# img *= 2.0 / 255.0
# img -= 1.0
return img
def detect(self, path):
"""Detect objects in the input image."""
img_resized = self.read(path)
img_resized = self.preprocess(img_resized)
np.copyto(self.host_inputs[0], img_resized.ravel())
cuda.memcpy_htod_async(self.cuda_inputs[0], self.host_inputs[0], self.stream)
self.context.execute_async(
batch_size=1, bindings=self.bindings, stream_handle=self.stream.handle
)
cuda.memcpy_dtoh_async(self.host_outputs[0], self.cuda_outputs[0], self.stream)
self.stream.synchronize()
output = self.host_outputs[0]
return img_resized, output
model = TrtMobilenet("mobilenet.bin", (224, 224))
img, scores = model.detect("frame.jpg")
It works as expected, returning the exact same results as the original tensorflow model.
Finally, I have integrated this model into DeepStream using the following pipeline:
gst-launch-1.0 multifilesrc location=${images} caps="image/jpeg,framerate=1/1" ! \
jpegparse ! \
nvv4l2decoder ! \
nvvideoconvert ! \
'video/x-raw(memory:NVMM),format=(string)NV12' ! \
mux.sink_0 nvstreammux live-source=0 name=mux batch-size=1 width=224 height=224 ! \
nvinfer config-file-path=mobilenet.txt batch-size=1 process-mode=1 ! \
nvstreamdemux name=demux demux.src_0 ! \
nvvideoconvert ! \
nvdsosd ! \
nvvideoconvert ! \
nvv4l2h265enc ! \
h265parse ! \
qtmux ! \
filesink location=detections.mp4
and its corresponding mobilenet.txt configuration file:
[property]
gpu-id=0
net-scale-factor=1.0
uff-file=mobilenet.uff
model-engine-file=mobilenet.bin
input-dims=3;224;224;0
uff-input-blob-name=input
output-blob-names=scores
labelfile-path=labels.txt
num-detected-classes=2
batch-size=2
model-color-format=1
network-mode=2
is-classifier=1
process-mode=1
classifier-async-mode=0
classifier-threshold=0.
operate-on-gie-id=1
gie-unique-id=4
#parse-classifier-func-name=NvDsInferClassiferParseCustomSoftmax
#custom-lib-path=/opt/nvidia/deepstream/deepstream-5.0/lib/libnvds_infercustomparser.so
The results (softmax probabilities) are different and wrong. What am I doing wrong?