Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU)
• DeepStream Version
• JetPack Version (valid for Jetson only)
• TensorRT Version
• NVIDIA GPU Driver Version (valid for GPU only)
**• Issue Type( questions,)
I am testing the DLA functionality to make use of its power in the edge devices
From the below reference
We understood that the DLA should increase the throughput while running the inference
We tested by taking ONNX version of yolov8s and run the trtexec to generate the engine file with below 2 ways to check the difference between model running with DLA and without
trtexec --onnx=onnx_models/yolov8s.onnx --saveEngine=yolov8s.engine --exportProfile=model_gn.json --int8 --useDLACore=0 --allowGPUFallback --useSpinWait --separateProfileRun > model_gn.log
trtexec --onnx=onnx_models/yolov8s.onnx --saveEngine=yolov8s.engine --int8 > model_gn.log
While it is running as we checked the throughput as below
Engine with DLA throughput : ~ 140 qps
Engine without DLA throughput : ~ 201 qps
As we understood this should be other way around if DLA performs better on the resources
Below i have attached the generated profile for the model with DLA for the reference
[
{ "count" : 433 }
, { "name" : "Reformatting CopyNode for Input Tensor 0 to {ForeignNode[/0/model.0/conv/Conv.../0/model.22/Concat_2]}", "timeMs" : 47.0022, "averageMs" : 0.10855, "medianMs" : 0.108544, "percentage" : 1.5176 }
, { "name" : "{ForeignNode[/0/model.0/conv/Conv.../0/model.22/Concat_2]}", "timeMs" : 2543.14, "averageMs" : 5.87329, "medianMs" : 5.86982, "percentage" : 82.1123 }
, { "name" : "Reformatting CopyNode for Input Tensor 0 to /0/model.22/Reshape", "timeMs" : 37.5533, "averageMs" : 0.0867282, "medianMs" : 0.086688, "percentage" : 1.21251 }
, { "name" : "/0/model.22/Reshape", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Reshape_copy_output", "timeMs" : 31.9359, "averageMs" : 0.073755, "medianMs" : 0.073696, "percentage" : 1.03114 }
, { "name" : "Reformatting CopyNode for Input Tensor 0 to /0/model.22/Reshape_1", "timeMs" : 13.1777, "averageMs" : 0.0304335, "medianMs" : 0.030432, "percentage" : 0.42548 }
, { "name" : "/0/model.22/Reshape_1", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Reshape_1_copy_output", "timeMs" : 9.47417, "averageMs" : 0.0218803, "medianMs" : 0.021888, "percentage" : 0.3059 }
, { "name" : "Reformatting CopyNode for Input Tensor 0 to /0/model.22/Reshape_2", "timeMs" : 6.68272, "averageMs" : 0.0154335, "medianMs" : 0.015456, "percentage" : 0.21577 }
, { "name" : "/0/model.22/Reshape_2", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Reshape_2_copy_output", "timeMs" : 3.87344, "averageMs" : 0.00894559, "medianMs" : 0.009024, "percentage" : 0.125065 }
, { "name" : "/0/model.22/dfl/Reshape + /0/model.22/dfl/Transpose", "timeMs" : 37.558, "averageMs" : 0.0867391, "medianMs" : 0.08672, "percentage" : 1.21267 }
, { "name" : "/0/model.22/dfl/Softmax", "timeMs" : 30.5957, "averageMs" : 0.0706599, "medianMs" : 0.070624, "percentage" : 0.987869 }
, { "name" : "Reformatting CopyNode for Output Tensor 0 to /0/model.22/dfl/Softmax", "timeMs" : 14.4346, "averageMs" : 0.0333362, "medianMs" : 0.033312, "percentage" : 0.466061 }
, { "name" : "/0/model.22/dfl/conv/Conv", "timeMs" : 20.9888, "averageMs" : 0.0484729, "medianMs" : 0.04848, "percentage" : 0.677681 }
, { "name" : "/0/model.22/Constant_3_output_0", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_3_output_0_clone_1", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_3_output_0_clone_2", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_9_output_0", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_10_output_0", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "(Unnamed Layer* 240) [Constant] + (Unnamed Layer* 241) [Shuffle]", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_16_output_0", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_17_output_0", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "(Unnamed Layer* 254) [Constant] + (Unnamed Layer* 255) [Shuffle]", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_23_output_0", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Constant_24_output_0", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "(Unnamed Layer* 268) [Constant] + (Unnamed Layer* 269) [Shuffle]", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Expand", "timeMs" : 3.42969, "averageMs" : 0.00792077, "medianMs" : 0.008, "percentage" : 0.110737 }
, { "name" : "/0/model.22/Expand_1", "timeMs" : 2.66739, "averageMs" : 0.00616026, "medianMs" : 0.006144, "percentage" : 0.0861242 }
, { "name" : "/0/model.22/Expand_2", "timeMs" : 2.8393, "averageMs" : 0.00655727, "medianMs" : 0.006624, "percentage" : 0.0916747 }
, { "name" : "/0/model.22/Expand_3", "timeMs" : 2.65347, "averageMs" : 0.00612811, "medianMs" : 0.006144, "percentage" : 0.0856748 }
, { "name" : "/0/model.22/Expand_4", "timeMs" : 2.83373, "averageMs" : 0.00654441, "medianMs" : 0.006688, "percentage" : 0.0914949 }
, { "name" : "/0/model.22/Expand_5", "timeMs" : 2.54567, "averageMs" : 0.00587914, "medianMs" : 0.006016, "percentage" : 0.082194 }
, { "name" : "/0/model.22/Unsqueeze_1", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Unsqueeze_1_copy_output", "timeMs" : 3.32502, "averageMs" : 0.00767904, "medianMs" : 0.007712, "percentage" : 0.107358 }
, { "name" : "/0/model.22/Unsqueeze", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "(Unnamed Layer* 242) [Slice]", "timeMs" : 2.92838, "averageMs" : 0.00676301, "medianMs" : 0.006784, "percentage" : 0.0945511 }
, { "name" : "/0/model.22/Unsqueeze_3", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Unsqueeze_3_copy_output", "timeMs" : 2.76176, "averageMs" : 0.00637819, "medianMs" : 0.006336, "percentage" : 0.0891711 }
, { "name" : "/0/model.22/Unsqueeze_2", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "(Unnamed Layer* 256) [Slice]", "timeMs" : 2.55299, "averageMs" : 0.00589606, "medianMs" : 0.005856, "percentage" : 0.0824306 }
, { "name" : "Reformatting CopyNode for Output Tensor 0 to (Unnamed Layer* 256) [Slice]", "timeMs" : 3.14115, "averageMs" : 0.00725439, "medianMs" : 0.007264, "percentage" : 0.101421 }
, { "name" : "/0/model.22/Unsqueeze_5", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Unsqueeze_5_copy_output", "timeMs" : 2.80368, "averageMs" : 0.00647502, "medianMs" : 0.0064, "percentage" : 0.0905248 }
, { "name" : "/0/model.22/Unsqueeze_4", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "(Unnamed Layer* 270) [Slice]", "timeMs" : 2.53015, "averageMs" : 0.00584329, "medianMs" : 0.005824, "percentage" : 0.0816929 }
, { "name" : "Reformatting CopyNode for Output Tensor 0 to (Unnamed Layer* 270) [Slice]", "timeMs" : 2.66938, "averageMs" : 0.00616484, "medianMs" : 0.006144, "percentage" : 0.0861883 }
, { "name" : "/0/model.22/Squeeze + (Unnamed Layer* 243) [Shuffle]_copy_input", "timeMs" : 2.69658, "averageMs" : 0.00622766, "medianMs" : 0.006208, "percentage" : 0.0870665 }
, { "name" : "/0/model.22/Squeeze + (Unnamed Layer* 243) [Shuffle]", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Squeeze_1 + (Unnamed Layer* 257) [Shuffle]_copy_input", "timeMs" : 2.44269, "averageMs" : 0.00564132, "medianMs" : 0.0056, "percentage" : 0.0788691 }
, { "name" : "/0/model.22/Squeeze_1 + (Unnamed Layer* 257) [Shuffle]", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Squeeze_2 + (Unnamed Layer* 271) [Shuffle]_copy_input", "timeMs" : 2.45511, "averageMs" : 0.00566999, "medianMs" : 0.005632, "percentage" : 0.07927 }
, { "name" : "/0/model.22/Squeeze_2 + (Unnamed Layer* 271) [Shuffle]", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Unsqueeze_output_0 copy", "timeMs" : 3.092, "averageMs" : 0.00714088, "medianMs" : 0.0072, "percentage" : 0.0998339 }
, { "name" : "/0/model.22/Unsqueeze_2_output_0 copy", "timeMs" : 2.79174, "averageMs" : 0.00644745, "medianMs" : 0.006368, "percentage" : 0.0901393 }
, { "name" : "/0/model.22/Unsqueeze_4_output_0 copy", "timeMs" : 2.75017, "averageMs" : 0.00635144, "medianMs" : 0.006272, "percentage" : 0.0887971 }
, { "name" : "/0/model.22/Reshape_3", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Reshape_3_copy_output", "timeMs" : 2.74454, "averageMs" : 0.00633844, "medianMs" : 0.006336, "percentage" : 0.0886153 }
, { "name" : "/0/model.22/Reshape_4", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Reshape_4_copy_output", "timeMs" : 2.4913, "averageMs" : 0.00575358, "medianMs" : 0.005728, "percentage" : 0.0804386 }
, { "name" : "/0/model.22/Reshape_5", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/0/model.22/Reshape_5_copy_output", "timeMs" : 2.43866, "averageMs" : 0.005632, "medianMs" : 0.0056, "percentage" : 0.0787389 }
, { "name" : "/0/model.22/Transpose + /0/model.22/Unsqueeze_6", "timeMs" : 3.4111, "averageMs" : 0.00787784, "medianMs" : 0.007808, "percentage" : 0.110137 }
, { "name" : "Reformatting CopyNode for Output Tensor 0 to /0/model.22/Transpose + /0/model.22/Unsqueeze_6", "timeMs" : 3.51277, "averageMs" : 0.00811263, "medianMs" : 0.008064, "percentage" : 0.11342 }
, { "name" : "/0/model.22/dfl/Reshape_1", "timeMs" : 6.9063, "averageMs" : 0.0159499, "medianMs" : 0.015904, "percentage" : 0.222989 }
, { "name" : "Reformatting CopyNode for Input Tensor 0 to PWN(/0/model.22/Add)", "timeMs" : 2.89895, "averageMs" : 0.00669502, "medianMs" : 0.006688, "percentage" : 0.0936006 }
, { "name" : "PWN(/0/model.22/Add)", "timeMs" : 3.14563, "averageMs" : 0.00726474, "medianMs" : 0.007264, "percentage" : 0.101566 }
, { "name" : "PWN(/0/model.22/Add_1)", "timeMs" : 2.91149, "averageMs" : 0.006724, "medianMs" : 0.00672, "percentage" : 0.0940057 }
, { "name" : "PWN(/0/model.22/Add_2)", "timeMs" : 2.89053, "averageMs" : 0.00667559, "medianMs" : 0.00672, "percentage" : 0.0933288 }
, { "name" : "/0/model.22/Sub", "timeMs" : 4.2896, "averageMs" : 0.00990669, "medianMs" : 0.00992, "percentage" : 0.138502 }
, { "name" : "PWN(/0/model.22/Add_4)", "timeMs" : 4.2704, "averageMs" : 0.00986235, "medianMs" : 0.009952, "percentage" : 0.137882 }
, { "name" : "/0/model.22/Sub_1", "timeMs" : 4.16672, "averageMs" : 0.00962292, "medianMs" : 0.009632, "percentage" : 0.134534 }
, { "name" : "Reformatting CopyNode for Output Tensor 0 to /0/model.22/Sub_1", "timeMs" : 4.08883, "averageMs" : 0.00944304, "medianMs" : 0.009408, "percentage" : 0.13202 }
, { "name" : "/0/model.22/Transpose_1 + (Unnamed Layer* 350) [Shuffle]", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "PWN(/0/model.22/Constant_36_output_0 + (Unnamed Layer* 346) [Shuffle], PWN(/0/model.22/Add_5, /0/model.22/Div_1))", "timeMs" : 6.02016, "averageMs" : 0.0139034, "medianMs" : 0.013856, "percentage" : 0.194378 }
, { "name" : "/0/model.22/Div_1_output_0 copy", "timeMs" : 3.96192, "averageMs" : 0.00914993, "medianMs" : 0.009152, "percentage" : 0.127922 }
, { "name" : "PWN(/0/model.22/Sigmoid)", "timeMs" : 27.1587, "averageMs" : 0.0627221, "medianMs" : 0.062688, "percentage" : 0.876894 }
, { "name" : "Reformatting CopyNode for Input Tensor 1 to /0/model.22/Mul_2", "timeMs" : 3.67088, "averageMs" : 0.00847778, "medianMs" : 0.008512, "percentage" : 0.118525 }
, { "name" : "/0/model.22/Mul_2", "timeMs" : 5.63786, "averageMs" : 0.0130205, "medianMs" : 0.013024, "percentage" : 0.182034 }
, { "name" : "/0/model.22/Mul_2_output_0 copy", "timeMs" : 3.21837, "averageMs" : 0.00743273, "medianMs" : 0.007424, "percentage" : 0.103914 }
, { "name" : "Reformatting CopyNode for Input Tensor 0 to /1/Transpose", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/1/Transpose", "timeMs" : 44.2564, "averageMs" : 0.102209, "medianMs" : 0.102144, "percentage" : 1.42894 }
, { "name" : "Reformatting CopyNode for Output Tensor 0 to /1/Transpose", "timeMs" : 0, "averageMs" : 0, "medianMs" : 0, "percentage" : 0 }
, { "name" : "/1/Slice", "timeMs" : 4.83623, "averageMs" : 0.0111691, "medianMs" : 0.011136, "percentage" : 0.156151 }
, { "name" : "/1/Slice_1", "timeMs" : 27.9762, "averageMs" : 0.0646101, "medianMs" : 0.064576, "percentage" : 0.90329 }
, { "name" : "/1/ReduceMax", "timeMs" : 32.2679, "averageMs" : 0.0745217, "medianMs" : 0.074432, "percentage" : 1.04186 }
, { "name" : "/1/ArgMax", "timeMs" : 32.8336, "averageMs" : 0.0758281, "medianMs" : 0.075776, "percentage" : 1.06012 }
, { "name" : "/1/Cast", "timeMs" : 2.81644, "averageMs" : 0.00650449, "medianMs" : 0.006496, "percentage" : 0.0909368 }
]
Please advice