:nvinfer1::rt::ExecutionContext::enqueueInternal::330, condition: bindings[x] != nullptr

Description

A clear and concise description of the bug or issue.

Environment

TensorRT Version: 8.2.3.0
GPU Type: 2070ti
Nvidia Driver Version:
CUDA Version: 10.2
CUDNN Version: 8.0
Operating System + Version: windows11
Python Version (if applicable): 3.6.7
TensorFlow Version (if applicable):
PyTorch Version (if applicable): 1.8.0
Baremetal or Container (if container which image + tag):

i fllowed the instruction to use tensorrt with onnx file for inference accelaration, but i met a problem that “3: [executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueInternal::330] Error Code 3: API Usage Error (Parameter check failed at: executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueInternal::330, condition: bindings != nullptr
)” . I cannot find any solution, what should i do? Here is my code:

std::string trtModelName = "D:/08_Work_file/21_face_parse/DDRNet/DDRNetMY2ONNX/C++/infer.trt";


Mat image = imread(".t/Image_Test11_1_aligned_smile.png", IMREAD_ANYCOLOR);


Mat img2;
resize(image, img2, Size(512, 512));
cv::cvtColor(img2, img2, cv::COLOR_RGB2BGR);
img2.convertTo(img2, CV_32F);
cout << "dims:" << img2.dims << endl;
cout << "rows:" << img2.rows << endl;
cout << "cols:" << img2.cols << endl;
cout << "channels:" << img2.channels() << endl;
cout << "type:" << img2.type() << endl;
system("pause");
    // 4、deserialize model
IRuntime* runtime = createInferRuntime(gLogger);
std::string cached_path = trtModelName;
std::ifstream fin(cached_path,  std::ios::in | std::ios::binary);
std::string cached_engine = "";
while (fin.peek() != EOF) {
	std::stringstream buffer;
	buffer << fin.rdbuf();
	cached_engine.append(buffer.str());
}
fin.close();

ICudaEngine* engine = runtime->deserializeCudaEngine(cached_engine.data(), cached_engine.size(), nullptr);

system("pause");

IExecutionContext *context = engine->createExecutionContext();

void* buffers[2] = { NULL, NULL };
int nBatchSize = 1;
int nOutputSize = 1;
cudaMalloc(&buffers[0], nBatchSize * 512 * 512 * 3 * sizeof(float));
cudaMalloc(&buffers[1], nBatchSize * nOutputSize * 512 * 512 * sizeof(float));

cudaStream_t stream;
cudaStreamCreate(&stream);
void *data = malloc(nBatchSize * 512 * 512 * 3 * sizeof(float));
memcpy(data, img2.ptr<float>(0), 512 * 512 * 3 * sizeof(float));
system("pause");


cudaMemcpyAsync(buffers[0], data, nBatchSize * 512 * 512 * 3 * sizeof(float), cudaMemcpyHostToDevice, stream);
std::cout << "start to infer image..." << std::endl;

context->enqueueV2(buffers, stream, nullptr);

float prob[1 * 512 * 512];
cudaMemcpyAsync(prob, buffers[1], nBatchSize * nOutputSize * 512 * 512 * sizeof(float), cudaMemcpyDeviceToHost, stream);

cudaStreamSynchronize(stream);
cudaStreamDestroy(stream);

std::cout << "image inference finished!" << std::endl;

Hi,
Request you to share the ONNX model and the script if not shared already so that we can assist you better.
Alongside you can try few things:

  1. validating your model with the below snippet

check_model.py

import sys
import onnx
filename = yourONNXmodel
model = onnx.load(filename)
onnx.checker.check_model(model).
2) Try running your model with trtexec command.
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec
In case you are still facing issue, request you to share the trtexec “”–verbose"" log for further debugging
Thanks!