Inference time on jetson nano

I have trained an image classification model using the Tensorflow library, converted model into .onnx format for inference. I have used the following code to inference to optimise using tensor rt and finally inferenced using the following code.

This sample uses a ONNX model to create a TensorRT Inference Engine
from random import randint
from matplotlib import pyplot as plt # Additional statement for showing image
from PIL import Image
import numpy as np
import timeit

import pycuda.driver as cuda
This import causes pycuda to automatically manage CUDA context creation and cleanup.
import pycuda.autoinit

import tensorrt as trt

import sys, os
configfile = ‘/usr/src/tensorrt/samples/python/common.py’
sys.path.append(os.path.dirname(os.path.expanduser(configfile)))
import common

You can set the logger severity higher to suppress messages (or lower to display more messages).
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

class ModelData(object):
MODEL_FILE = “512_resnet.onnx”
INPUT_NAME =“input”
INPUT_SHAPE = (1, 512, 512,3)
OUTPUT_NAME = “dense_1”

“”"def build_engine(model_file):
For more information on TRT basics, refer to the introductory samples.

with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
    builder.max_workspace_size = common.GiB(1)
    # Parse the Uff Network
    parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
    parser.register_output(ModelData.OUTPUT_NAME)
    parser.parse(model_file, network)
    # Build and return an engine.
    return builder.build_cuda_engine(network)"""

The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
#builder.max_workspace_size = common.GiB(1)
# added code
config = builder.create_builder_config()
config.max_workspace_size = common.GiB(1)
profile = builder.create_optimization_profile()
profile.set_shape_input(“input”, (1,512, 512, 3), (1,512,512,3), (1,512, 512,3))
profile.set_shape(“input”, (1,512, 512,3), (1,512, 512,3), (1,512, 512,3))
config.add_optimization_profile(profile)
# end added code
with open(model_file, ‘rb’) as model:
if not parser.parse(model.read()):
for error in range(parser.num_errors):
print(parser.get_error(error))
return None
network.get_input(0).shape = [1, 512, 512,3]
return builder.build_engine(network,config)

Loads a test case into the provided pagelocked_buffer.

def load_normalized_test_case(data_paths, pagelocked_buffer, case_num=7):
[test_case_path] = locate_files(data_paths, [str(case_num) + “.JPG”])
# Flatten the image into a 1D array, normalize, and copy to pagelocked memory.
img = np.array(Image.open(test_case_path)).ravel()
plt.imshow(img.reshape(512, 512, 3)) # Additional statement for showing image
plt.show() # Additional statement for showing image
#np.copyto(pagelocked_buffer, 1.0 - img / 255.0)
np.copyto(pagelocked_buffer, img )
return case_num

def locate_files(data_paths, filenames, err_msg=“”):
“”"
Locates the specified files in the specified data directories.
If a file exists in multiple data directories, the first directory is used.

Args:
    data_paths (List[str]): The data directories.
    filename (List[str]): The names of the files to find.

Returns:
    List[str]: The absolute paths of the files.

Raises:
    FileNotFoundError if a file could not be located.
"""
found_files = [None] * len(filenames)
for data_path in data_paths:
    # Find all requested files.
    for index, (found, filename) in enumerate(zip(found_files, filenames)):
        if not found:
            print( " data path ", data_path)
            print( " filename ", filename) 
            file_path = os.path.abspath(os.path.join(data_path, filename))
            print(" file_path ", file_path)
            if os.path.exists(file_path):
                found_files[index] = file_path

# Check that all files were found
for f, filename in zip(found_files, filenames):
    if not f or not os.path.exists(f):
        raise FileNotFoundError("Could not find {:}. Searched in data paths: {:}\n{:}".format(filename, data_paths, err_msg))
return found_files

def main():
data_paths,_ = common.find_sample_data(description=“Runs an MNIST network using an ONNX model file”, subfolder=“AJAX”)
model_path = os.environ.get(“MODEL_PATH”) or os.path.join(os.path.dirname(file), “models”)
model_file = os.path.join(model_path, ModelData.MODEL_FILE)

print("Parsing ONNX file and building engine...Model file = ", model_file)
#engine = build_engine_onnx(model_file)
#if engine is None:
#    print("Engine creation failed. Exiting...")
#    quit()

print("Serializing engine and writing file...")
#serialized_engine = engine.serialize()
#with open('./models/serialized_engine', 'wb') as f:
#    f.write(serialized_engine)

# Read engine from file and deserialize
with open('./models/serialized_engine', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())

# Build an engine, allocate buffers and create a stream.
# For more information on buffer allocation, refer to the introductory samples.
inputs, outputs, bindings, stream = common.allocate_buffers(engine)

with engine.create_execution_context() as context:
    case_num = load_normalized_test_case(data_paths, pagelocked_buffer=inputs[0].host)
    # For more information on performing inference, refer to the introductory samples.
    # The common.do_inference function will return a list of outputs - we only have one in this case.
    start_time = timeit.default_timer()
    [output] = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    prediction_time = timeit.default_timer() - start_time
    pred = np.argmax(output)
    print("Test Case: " + str(case_num))
    print("Prediction: " + str(pred))
    print("Prediction time: " + str(prediction_time))

if name == ‘main’:
main()

The problem is the inference time , the model is taking around 60-90 second for single image prediction.

how can I optimize the code or model to bring down the inference time?

my onnx model file is around 97 mbs.

what is the max model size we can infer on jetson nano 2gb with reasonable inference time?

Dear @deepak11.iitb,
You may use low precision models (like FP16,INT8) to improve the inference time quickly. Please check Developer Guide :: NVIDIA Deep Learning TensorRT Documentation

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.