nobody answer my post: https://devtalk.nvidia.com/default/topic/1070696/tensorrt/error-run-2-context-parallel-in-tensorrt7/post/5427317/#5427317
out error when run about 1 minute:
...
[F] [TRT] Assertion failed: *refCount > 0
../rtSafe/WeightsPtr.cpp:20
Aborting...
[F] [TRT] Assertion failed: *refCount > 0
../rtSafe/WeightsPtr.cpp:20
Aborting...
[E] [TRT] FAILED_EXECUTION: std::exception
error when run graph TensorRT
[E] [TRT] FAILED_EXECUTION: std::exception
error when run graph TensorRT
0
1
0
1
0
1
[F] [TRT] Assertion failed: *refCount > 0
../rtSafe/WeightsPtr.cpp:20
Aborting...
[E] [TRT] FAILED_EXECUTION: std::exception
error when run graph TensorRT
[F] [TRT] Assertion failed: *refCount > 0
../rtSafe/WeightsPtr.cpp:20
Aborting...
[E] [TRT] FAILED_EXECUTION: std::exception
error when run graph TensorRT
0
1
double free or corruption (!prev)
[F] [TRT] Assertion failed: *refCount > 0
../rtSafe/WeightsPtr.cpp:20
Aborting...
[E] Aborted (core dumped)
test code:
#include "NvInfer.h"
#include <iostream>
#include "NvUtils.h"
#include "NvOnnxParser.h"
using namespace nvinfer1;
#include <thread>
#include "common/logger.h"
#include "common/buffers.h"
std::string model_path = "detection_model.onnx";
void convert_dims_to_vect(const nvinfer1::Dims& dims, std::vector<int>& v){
v.resize(dims.nbDims);
for (int i=0; i<dims.nbDims; ++i)
v[i] = dims.d[i];
}
void make_explicit_shapes(IExecutionContext* context,const std::vector<std::string>& tensorNames, std::vector<std::vector<int>>& explicit_shapes){
int n = tensorNames.size();
explicit_shapes.resize(n);
std::string suffix;
int profile_index = context->getOptimizationProfile();
if (profile_index!=0)
suffix = " [profile "+std::to_string(profile_index)+"]";
std::vector<int> v;
for (int i=0; i<n; ++i){
int index = context->getEngine().getBindingIndex((tensorNames[i]+suffix).c_str());
convert_dims_to_vect(context->getBindingDimensions(index), v);
explicit_shapes[i] = v;
}
}
std::string input_name = "fts_input_images:0";
Dims4 dims1(1,10,10,3);
Dims4 dims2(1,80,80,3);
Dims4 dims3(1,500,500,3);
void run(std::vector<IExecutionContext*> contexts, int i, std::vector<std::vector<int>> explicit_shapes){
std::vector<samplesCommon::DeviceBuffer> deviceBuffers;
std::vector<samplesCommon::HostBuffer> hostBuffers;
for (int i=0; i<explicit_shapes.size(); ++i){
size_t allocationSize = std::accumulate(explicit_shapes[i].begin(), explicit_shapes[i].end(), 1, std::multiplies<int>()) * 4;
hostBuffers.emplace_back(allocationSize);
// std::cout<<"allocationSize: "<<allocationSize<<"\n";
deviceBuffers.emplace_back(allocationSize);
}
std::vector<void*> mDeviceBindings(i*deviceBuffers.size(), NULL);
for (auto& buffer:(deviceBuffers)){
// std::cout<<buffer.data()<<" buffer\n";
mDeviceBindings.emplace_back(buffer.data());
}
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
if (!contexts[i]->enqueueV2(mDeviceBindings.data(), stream, nullptr)){
std::cout<<"error when run graph TensorRT\n";
}
cudaStreamSynchronize(stream);
cudaStreamDestroy(stream);
}
int main(int argc, char** argv) {
auto builder = createInferBuilder(gLogger);
auto config = builder->createBuilderConfig();
for (int i=0; i<2; ++i){
auto profile = builder->createOptimizationProfile();
profile->setDimensions(input_name.c_str(), OptProfileSelector::kMIN, dims1);
profile->setDimensions(input_name.c_str(), OptProfileSelector::kOPT, dims2);
profile->setDimensions(input_name.c_str(), OptProfileSelector::kMAX, dims3);
config->addOptimizationProfile(profile);
}
auto network = builder->createNetworkV2(1U << static_cast<int>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
auto parser = nvonnxparser::createParser(*network, gLogger);
parser->parseFromFile(model_path.c_str(), 3);
auto engine = builder->buildEngineWithConfig(*network,*config);
std::vector<std::string> tensorNames;
for (int i=0; i<engine->getNbBindings(); ++i){
std::string name(engine->getBindingName(i));
if (name.find("[profile")==-1){
tensorNames.emplace_back(name);
}
}
std::vector<IExecutionContext*> contexts;
std::vector<std::vector<int>> explicit_shapes;
for (int i=0; i<2; ++i){
contexts.emplace_back(engine->createExecutionContext());
auto context = contexts.back();
context->setOptimizationProfile(i);
std::cout<<"allInputDimensionsSpecified: "<<context->allInputDimensionsSpecified()<<"\n";
int index;
if (i==0)
index = engine->getBindingIndex((input_name).c_str());
else
index = engine->getBindingIndex((input_name+" [profile "+std::to_string(i)+"]").c_str());
context->setBindingDimensions(index, dims2);
std::cout<<"allInputDimensionsSpecified must equal 1: "<<context->allInputDimensionsSpecified()<<"\n";
explicit_shapes.clear();
make_explicit_shapes(context, tensorNames, explicit_shapes);
}
for (;;){
std::vector<std::thread> v_thread;
for (int i=0;i<2;++i){
v_thread.emplace_back(run, contexts, i, explicit_shapes);
std::cout<<i<<"\n";
}
for (auto p=v_thread.begin(); p!=v_thread.end(); p++)
p->join();
}
}
model: Microsoft OneDrive - Access files anywhere. Create docs with free Office Online.
I use tensorrt7 + driver440.48.02 + GTX1080 + ubuntu 18.04.
I tested with many version cuda and cudnn