Description
I am searching for an optimized construction of a network. I run the same code several times to build the engine and calculate the average inference time. The average inference time varies, which indicates that the engines built with the same builder configuration perform differently in different experiments.
I have increased the parameters in setMinTimingIterations()
and setAvgTimingIterations()
, hoping the engine’s construction can converge to an optimized one, but not work. How can I find an optimized and fixed engine with IBuilderConfig? I am not expecting to serialize the engine. Thank you!
Environment
TensorRT Version: 8.0.1.6
GPU Type: 1080ti
Nvidia Driver Version:
CUDA Version:
CUDNN Version:
Operating System + Version:
Python Version (if applicable):
TensorFlow Version (if applicable):
PyTorch Version (if applicable):
Baremetal or Container (if container which image + tag):
Relevant Files
Below is my code to set the profiler and the config.
bool Profiler::construct_s1(
TRTUniquePtr<nvinfer1::IBuilder>& builder,
TRTUniquePtr<nvinfer1::INetworkDefinition>& network,
TRTUniquePtr<nvinfer1::IBuilderConfig>& config,
TRTUniquePtr<nvonnxparser::IParser>& parser)
{
auto profile = builder->createOptimizationProfile();
samplesCommon::OnnxSampleParams params;
params.dataDirs.emplace_back("./models");
auto parsed = parser->parseFromFile(locateFile("resnet.onnx", params.dataDirs).c_str(),
static_cast<int>(sample::gLogger.getReportableSeverity()));
if (!parsed) {
return false;
}
input_dims_s1 = network->getInput(0)->getDimensions();
input_tensor_names_ = network->getInput(0)->getName();
nvinfer1::Dims min_dims = input_dims_s1;
min_dims.d[0] = batch_size_s1_;
nvinfer1::Dims opt_dims = input_dims_s1;
opt_dims.d[0] = batch_size_s1_;
nvinfer1::Dims max_dims = input_dims_s1;
max_dims.d[0] = batch_size_s1_;
profile->setDimensions(input_tensor_names_.c_str(), nvinfer1::OptProfileSelector::kMIN, min_dims);
profile->setDimensions(input_tensor_names_.c_str(), nvinfer1::OptProfileSelector::kOPT, opt_dims);
profile->setDimensions(input_tensor_names_.c_str(), nvinfer1::OptProfileSelector::kMAX, max_dims);
config->addOptimizationProfile(profile);
config->setMaxWorkspaceSize(3_GiB);
config->setMinTimingIterations(5);
config->setAvgTimingIterations(5);
return true;
}