I use tensorRT4 to do inference.I load the engine to the memory,then mutil thread will do inference via the fuction like this.
int TensorRTEngine::doInference(float *input, float *output,int batchSize) {
int64_t start=getCurrentTime();
IExecutionContext *context=engine->createExecutionContext();
int64_t end=getCurrentTime();
EngineError("create context cost %d",(end-start));
// EngineError("maxBatchSize:%d",engine->getMaxBatchSize());
// input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
// of these, but in this case we know that there is exactly one input and one output.
assert(engine->getNbBindings() == 2);
void* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// note that indices are guaranteed to be less than IEngine::getNbBindings()
int inputIndex = engine->getBindingIndex(inputBlobName.c_str()),
outputIndex = engine->getBindingIndex(outPutBlobName.c_str());
size_t inputSize = batchSize * getInputSize() * sizeof(float);
size_t outputSize = batchSize * getOutPutSize() * sizeof(float);
// create GPU buffers and a stream
CHECK(cudaMalloc(&buffers[inputIndex], inputSize));
CHECK(cudaMalloc(&buffers[outputIndex], outputSize));
//context.setProfiler(&gProfiler);
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
// DMA the input to the GPU, execute the batch asynchronously, and DMA it back:
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, inputSize, cudaMemcpyHostToDevice, stream));
context->enqueue(batchSize, buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], outputSize, cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// release the stream and the buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
if(context){
context->destroy();
}
return 0;
}
every fuction call will create a new IExecutionContext from the engine,but i find when a lot of thread call this function ,sometimes the result is not corect,dose the tensorRT thread safety?