Inference time on jetson nano

deepak11.iitb · May 29, 2022, 8:13pm

I have trained an image classification model using the Tensorflow library, converted model into .onnx format for inference. I have used the following code to inference to optimise using tensor rt and finally inferenced using the following code.

This sample uses a ONNX model to create a TensorRT Inference Engine
from random import randint
from matplotlib import pyplot as plt # Additional statement for showing image
from PIL import Image
import numpy as np
import timeit

import pycuda.driver as cuda
This import causes pycuda to automatically manage CUDA context creation and cleanup.
import pycuda.autoinit

import tensorrt as trt

import sys, os
configfile = ‘/usr/src/tensorrt/samples/python/common.py’
sys.path.append(os.path.dirname(os.path.expanduser(configfile)))
import common

You can set the logger severity higher to suppress messages (or lower to display more messages).
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

class ModelData(object):
MODEL_FILE = “512_resnet.onnx”
INPUT_NAME =“input”
INPUT_SHAPE = (1, 512, 512,3)
OUTPUT_NAME = “dense_1”

“”"def build_engine(model_file):
For more information on TRT basics, refer to the introductory samples.

with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
    builder.max_workspace_size = common.GiB(1)
    # Parse the Uff Network
    parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
    parser.register_output(ModelData.OUTPUT_NAME)
    parser.parse(model_file, network)
    # Build and return an engine.
    return builder.build_cuda_engine(network)"""

The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
#builder.max_workspace_size = common.GiB(1)
# added code
config = builder.create_builder_config()
config.max_workspace_size = common.GiB(1)
profile = builder.create_optimization_profile()
profile.set_shape_input(“input”, (1,512, 512, 3), (1,512,512,3), (1,512, 512,3))
profile.set_shape(“input”, (1,512, 512,3), (1,512, 512,3), (1,512, 512,3))
config.add_optimization_profile(profile)
# end added code
with open(model_file, ‘rb’) as model:
if not parser.parse(model.read()):
for error in range(parser.num_errors):
print(parser.get_error(error))
return None
network.get_input(0).shape = [1, 512, 512,3]
return builder.build_engine(network,config)

Loads a test case into the provided pagelocked_buffer.

def load_normalized_test_case(data_paths, pagelocked_buffer, case_num=7):
[test_case_path] = locate_files(data_paths, [str(case_num) + “.JPG”])
# Flatten the image into a 1D array, normalize, and copy to pagelocked memory.
img = np.array(Image.open(test_case_path)).ravel()
plt.imshow(img.reshape(512, 512, 3)) # Additional statement for showing image
plt.show() # Additional statement for showing image
#np.copyto(pagelocked_buffer, 1.0 - img / 255.0)
np.copyto(pagelocked_buffer, img )
return case_num

def locate_files(data_paths, filenames, err_msg=“”):
“”"
Locates the specified files in the specified data directories.
If a file exists in multiple data directories, the first directory is used.

Args:
    data_paths (List[str]): The data directories.
    filename (List[str]): The names of the files to find.

Returns:
    List[str]: The absolute paths of the files.

Raises:
    FileNotFoundError if a file could not be located.
"""
found_files = [None] * len(filenames)
for data_path in data_paths:
    # Find all requested files.
    for index, (found, filename) in enumerate(zip(found_files, filenames)):
        if not found:
            print( " data path ", data_path)
            print( " filename ", filename) 
            file_path = os.path.abspath(os.path.join(data_path, filename))
            print(" file_path ", file_path)
            if os.path.exists(file_path):
                found_files[index] = file_path

# Check that all files were found
for f, filename in zip(found_files, filenames):
    if not f or not os.path.exists(f):
        raise FileNotFoundError("Could not find {:}. Searched in data paths: {:}\n{:}".format(filename, data_paths, err_msg))
return found_files

def main():
data_paths,_ = common.find_sample_data(description=“Runs an MNIST network using an ONNX model file”, subfolder=“AJAX”)
model_path = os.environ.get(“MODEL_PATH”) or os.path.join(os.path.dirname(file), “models”)
model_file = os.path.join(model_path, ModelData.MODEL_FILE)

print("Parsing ONNX file and building engine...Model file = ", model_file)
#engine = build_engine_onnx(model_file)
#if engine is None:
#    print("Engine creation failed. Exiting...")
#    quit()

print("Serializing engine and writing file...")
#serialized_engine = engine.serialize()
#with open('./models/serialized_engine', 'wb') as f:
#    f.write(serialized_engine)

# Read engine from file and deserialize
with open('./models/serialized_engine', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())

# Build an engine, allocate buffers and create a stream.
# For more information on buffer allocation, refer to the introductory samples.
inputs, outputs, bindings, stream = common.allocate_buffers(engine)

with engine.create_execution_context() as context:
    case_num = load_normalized_test_case(data_paths, pagelocked_buffer=inputs[0].host)
    # For more information on performing inference, refer to the introductory samples.
    # The common.do_inference function will return a list of outputs - we only have one in this case.
    start_time = timeit.default_timer()
    [output] = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
    prediction_time = timeit.default_timer() - start_time
    pred = np.argmax(output)
    print("Test Case: " + str(case_num))
    print("Prediction: " + str(pred))
    print("Prediction time: " + str(prediction_time))

if name == ‘main’:
main()

The problem is the inference time , the model is taking around 60-90 second for single image prediction.

how can I optimize the code or model to bring down the inference time?

my onnx model file is around 97 mbs.

what is the max model size we can infer on jetson nano 2gb with reasonable inference time?

SivaRamaKrishnaNV · May 30, 2022, 2:34pm

Dear @deepak11.iitb,
You may use low precision models (like FP16,INT8) to improve the inference time quickly. Please check Developer Guide :: NVIDIA Deep Learning TensorRT Documentation

system · June 29, 2022, 1:46am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Engine Plan Inference on JetsonTX2 Jetson TX2 tensorrt , python	11	1835	October 18, 2021
Jetson-Inference predictions differ from e.g. tensorflow predictions Jetson Nano jetson-inference	4	853	November 17, 2021
Inference error while using tensorrt engine on jetson nano Jetson Nano tensorrt , nvbugs	23	3520	April 20, 2022
Inference result gets worse when converting pytorch model to TensorRT model TensorRT pytorch	6	1079	January 19, 2022
TensorRT Inference error on Jetson nano TensorRT	3	1180	December 6, 2021
TensorRt inference is taking 1.5 sec to inference a single frame.i want to speed up my inference TensorRT tensorrt , jetson-inference , jetson-nano	1	901	March 13, 2023
Inference time increases rapidly when set a high resolution input image TensorRT tensorrt , cuda , ubuntu	1	744	September 13, 2023
How to load trained onnx model in jetson nano TensorRT tensorrt , jetson-inference , onnx , nano	3	2116	November 17, 2021
Tensorrt8.5 inference different with origin onnx model TensorRT	6	1062	December 13, 2022
Inference time changes after training TensorRT tensorrt	5	576	September 25, 2020

Inference time on jetson nano

The problem is the inference time , the model is taking around 60-90 second for single image prediction.

how can I optimize the code or model to bring down the inference time?

what is the max model size we can infer on jetson nano 2gb with reasonable inference time?

Related topics