Description
I want to parallel inference model in the nvidia xavier nx platform. I try to create multiply engine with different DLA core and create multiply contexts to inference model.
- would you give me same guide about parallel inferenceing model in this device?
Environment
TensorRT Version: 7.1.3
GPU Type:
Nvidia Driver Version:
CUDA Version: 10.2
CUDNN Version:
Operating System + Version:
Python Version (if applicable):
TensorFlow Version (if applicable):
PyTorch Version (if applicable):
Baremetal or Container (if container which image + tag):
Relevant Files
- create multiply engine
bool TensorrtExecutor::constructNetwork(UniquePtr<nvinfer1::IBuilder>& builder,
UniquePtr<nvinfer1::INetworkDefinition>& network,
UniquePtr<nvinfer1::IBuilderConfig>& config,
UniquePtr<nvonnxparser::IParser>& parser)
{
auto parsed = parser->parseFromFile(Iparams.ParamsOnnx.onnxFileName.c_str(), static_cast<int>(bench::gLogger.getReportableSeverity()));
if (!parsed)
{
std::cerr << "parse model error" << std::endl;
return false;
}
config->setMaxWorkspaceSize(4096_MiB);
if (Iparams.ParamsOnnx.fp16)
{
config->setFlag(BuilderFlag::kFP16);
}
if (Iparams.ParamsOnnx.int8)
{
config->setFlag(BuilderFlag::kINT8);
bench::setAllTensorScales(network.get(), 127.0f, 127.0f);
}
int count = 0;
for (int i = builder->getNbDLACores(); i >0 && count < this->num_threads; i--)
{
std::cout << "Using DLA core " << (i-1) << std::endl;
if(i-1 < 0)
{
config->setDefaultDeviceType(nvinfer1::DeviceType::kGPU);
config->setDLACore(i-1);
}
else
{
bench::enableDLA(builder.get(), config.get(), i-1);
Iengine = std::shared_ptr<nvinfer1::ICudaEngine>( builder->buildEngineWithConfig(*network, *config), bench::InferDeleter());
if(!Iengine)
{
std::cout << "The engine create failed." << std::endl;
// return false;
}
else{
Iengines.push_back(Iengine);
}
count ++;
}
}
return true;
}
- create multiply context
for(int i = 0; i < this->num_threads; i ++)
{
std::cout << "the size of Iengines is "<< Iengines.size()<< std::endl;
int k = i % Iengines.size();
std::cout << "the no." << k << " Engine." << std::endl;
auto context = UniquePtr<nvinfer1::IExecutionContext>(Iengines[k]->createExecutionContext());
// auto context = UniquePtr<nvinfer1::IExecutionContext>(Iengine->createExecutionContext());
if (!context)
{
std::cout << "can not create context" << std::endl;
}
else
{
Icontext.push_back(std::move(context));
status.SetStatus_inference(Status(Status::SUCCESS));
status.SetStatus_inference_asic(Status(Status::SUCCESS));
}
}
- then I create multiply thread to inference same model with different pictures to parallel inference result.
Steps To Reproduce
Please include:
- Exact steps/commands to build your repro
- Exact steps/commands to run your repro
- Full traceback of errors encountered