Description
A clear and concise description of the bug or issue.
Environment
TensorRT Version: 8.2.3.0
GPU Type: 2070ti
Nvidia Driver Version:
CUDA Version: 10.2
CUDNN Version: 8.0
Operating System + Version: windows11
Python Version (if applicable): 3.6.7
TensorFlow Version (if applicable):
PyTorch Version (if applicable): 1.8.0
Baremetal or Container (if container which image + tag):
i fllowed the instruction to use tensorrt with onnx file for inference accelaration, but i met a problem that “3: [executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueInternal::330] Error Code 3: API Usage Error (Parameter check failed at: executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueInternal::330, condition: bindings != nullptr
)” . I cannot find any solution, what should i do? Here is my code:
std::string trtModelName = "D:/08_Work_file/21_face_parse/DDRNet/DDRNetMY2ONNX/C++/infer.trt";
Mat image = imread(".t/Image_Test11_1_aligned_smile.png", IMREAD_ANYCOLOR);
Mat img2;
resize(image, img2, Size(512, 512));
cv::cvtColor(img2, img2, cv::COLOR_RGB2BGR);
img2.convertTo(img2, CV_32F);
cout << "dims:" << img2.dims << endl;
cout << "rows:" << img2.rows << endl;
cout << "cols:" << img2.cols << endl;
cout << "channels:" << img2.channels() << endl;
cout << "type:" << img2.type() << endl;
system("pause");
// 4、deserialize model
IRuntime* runtime = createInferRuntime(gLogger);
std::string cached_path = trtModelName;
std::ifstream fin(cached_path, std::ios::in | std::ios::binary);
std::string cached_engine = "";
while (fin.peek() != EOF) {
std::stringstream buffer;
buffer << fin.rdbuf();
cached_engine.append(buffer.str());
}
fin.close();
ICudaEngine* engine = runtime->deserializeCudaEngine(cached_engine.data(), cached_engine.size(), nullptr);
system("pause");
IExecutionContext *context = engine->createExecutionContext();
void* buffers[2] = { NULL, NULL };
int nBatchSize = 1;
int nOutputSize = 1;
cudaMalloc(&buffers[0], nBatchSize * 512 * 512 * 3 * sizeof(float));
cudaMalloc(&buffers[1], nBatchSize * nOutputSize * 512 * 512 * sizeof(float));
cudaStream_t stream;
cudaStreamCreate(&stream);
void *data = malloc(nBatchSize * 512 * 512 * 3 * sizeof(float));
memcpy(data, img2.ptr<float>(0), 512 * 512 * 3 * sizeof(float));
system("pause");
cudaMemcpyAsync(buffers[0], data, nBatchSize * 512 * 512 * 3 * sizeof(float), cudaMemcpyHostToDevice, stream);
std::cout << "start to infer image..." << std::endl;
context->enqueueV2(buffers, stream, nullptr);
float prob[1 * 512 * 512];
cudaMemcpyAsync(prob, buffers[1], nBatchSize * nOutputSize * 512 * 512 * sizeof(float), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
cudaStreamDestroy(stream);
std::cout << "image inference finished!" << std::endl;