TensorRT: input_1: dynamic input is missing dimensions in profile 0

TensorRT: input_1: dynamic input is missing dimensions in profile 0

I created an NN I trained in Python, converted it to ONNX, and now am trying to run that with TensorRT in C++. The C++ code I have is below (it is based on the code in How To Run Inference Using TensorRT C++ API | LearnOpenCV)

#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <memory>
#include <NvOnnxParser.h>
#include <vector>
#include <cuda_runtime_api.h>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/core.hpp>
#include <algorithm>
#include <numeric>

class Logger : public nvinfer1::ILogger
{
public:
    void log(Severity severity, const char* msg) override {
        // remove this 'if' if you need more logged info
        if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR)) {
            std::cout << msg << "\n";
        }
    }
} gLogger;

struct TRTDestroy
{
    template <class T>
    void operator()(T* obj) const
    {
        if (obj)
        {
            obj->destroy();
        }
    }
};

template <class T>
using TRTUniquePtr = std::unique_ptr<T, TRTDestroy>;

size_t getSizeByDim(const nvinfer1::Dims& dims)
{
    size_t size = 1;
    for (size_t i = 0; i < dims.nbDims; ++i)
    {
        size *= dims.d[i];
    }
    return size;
}

std::vector<std::string> getClassNames(const std::string& imagenet_classes)
{
    std::ifstream classes_file(imagenet_classes);
    std::vector<std::string> classes;
    if (!classes_file.good())
    {
        std::cerr << "ERROR: can't read file with classes names.\n";
        return classes;
    }
    std::string class_name;
    while (std::getline(classes_file, class_name))
    {
        classes.push_back(class_name);
    }
    return classes;
}

void preprocessImage(const std::string& image_path, float* gpu_input, const nvinfer1::Dims& dims)
{
    cv::Mat frame = cv::imread(image_path);
    if (frame.empty())
    {
        std::cerr << "Input image " << image_path << " load failed\n";
        return;
    }
    cv::cuda::GpuMat gpu_frame;
    gpu_frame.upload(frame);
    int channels=1;
    auto input_width = dims.d[1];
    auto input_height = dims.d[0];
    auto input_size = cv::Size(input_width, input_height);
    cv::cuda::GpuMat resized=gpu_frame;
    cv::cuda::GpuMat flt_image;
    resized.convertTo(flt_image, CV_32FC1, 1.f / 255.f);
    std::vector<cv::cuda::GpuMat> chw;
    for (size_t i = 0; i < channels; ++i)
    {
        chw.emplace_back(cv::cuda::GpuMat(input_size, CV_32FC1, gpu_input + i * input_width * input_height));
    }
    cv::split(flt_image, chw);
}

void postprocessResults(float *gpu_output, const nvinfer1::Dims &dims, int batch_size)
{
    std::vector<float> cpu_output(getSizeByDim(dims) * batch_size);
    cudaMemcpy(cpu_output.data(), gpu_output, cpu_output.size() * sizeof(float), cudaMemcpyDeviceToHost);

    auto cols = dims.d[1];
    auto rows = dims.d[0];
    cv::Mat Finalmat = cv::Mat(rows, cols, CV_32FC1); 
    memcpy(Finalmat.data, cpu_output.data(), cpu_output.size()*sizeof(float));
    Finalmat.convertTo(Finalmat, CV_8UC3, 255.0);
    cv::imwrite("/tensorRT.bmp", Finalmat);
}

void parseOnnxModel(const std::string& model_path, TRTUniquePtr<nvinfer1::ICudaEngine>& engine,
                    TRTUniquePtr<nvinfer1::IExecutionContext>& context)
{
    nvinfer1::IBuilder *builder = nvinfer1::createInferBuilder(gLogger);
    const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    nvinfer1::INetworkDefinition *network = builder->createNetworkV2(explicitBatch);
    TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, gLogger)};
    
    // parse ONNX
    if (!parser->parseFromFile(model_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO)))
    {
        std::cerr << "ERROR: could not parse the model.\n";
        return;
    }

    //create Config to configure engine parameters such as max memory or set FP16 mode
    TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};

    nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
    profile->setDimensions("foo", nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims3(3,100,200));
    profile->setDimensions("foo", nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims3(3,1024,1024));
    profile->setDimensions("foo", nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims3(3,4096,4096));

    config->addOptimizationProfile(profile);
    // allow TensorRT to use up to 1GB of GPU memory for tactic selection.
    config->setMaxWorkspaceSize(1ULL << 30);
    // use FP16 mode if possible
    if (builder->platformHasFastFp16())
    {
        config->setFlag(nvinfer1::BuilderFlag::kFP16);
    }
    // we have only one image in batch
    builder->setMaxBatchSize(1);
    // generate TensorRT engine optimized for the target platform
    engine.reset(builder->buildEngineWithConfig(*network, *config));
    context.reset(engine->createExecutionContext());
}

// main pipeline ------------------------------------------------------------------------------------------------------
int main(int argc, char* argv[])
    if (argc < 3)
    {
        std::cerr << "usage: " << argv[0] << " [model_name].onnx [image_name].jpg\n";
        return -1;
    }
    std::string model_path(argv[1]);
    std::string image_path(argv[2]);
    int batch_size = 1;

    TRTUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
    TRTUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
    parseOnnxModel(model_path, engine, context);

    std::vector<nvinfer1::Dims> input_dims; // we expect only one input
    std::vector<nvinfer1::Dims> output_dims; // and one output

    std::vector<void*> buffers(engine->getNbBindings()); // buffers for input and output data
    for (size_t i = 0; i < engine->getNbBindings(); ++i)
    {
        auto binding_size = getSizeByDim(engine->getBindingDimensions(i)) * batch_size * sizeof(float);
        cudaMalloc(&buffers[i], binding_size);
        if (engine->bindingIsInput(i))
        {
            input_dims.emplace_back(engine->getBindingDimensions(i));
        }
        else
        {
            output_dims.emplace_back(engine->getBindingDimensions(i));
        }
    }
    if (input_dims.empty() || output_dims.empty())
    {
        std::cerr << "Expect at least one input and one output for network\n";
        return -1;
    }

    // preprocess input data
    preprocessImage(image_path, (float *) buffers[0], input_dims[0]);
    // inference - "enqueue" asynchronously executes inference on a batch. 
    context->enqueue(batch_size, buffers.data(), 0, nullptr);
    // postprocess results
    postprocessResults((float *) buffers[1], output_dims[0], batch_size);


    for (void* buf : buffers)
    {
        cudaFree(buf);
    }
    return 0;
}

Operating System + Version: Ubuntu 18
TensorRT Version:
For training the NN, creating the .keras and ONNX files, I used Docker container 19.10, which has CUDA 10.1 because according to this source, if I wanted to use the GPU with TF then I needed CUDA 10.1: 從原始碼開始建構  |  TensorFlow

When I ran ./trt_sample unet.onnx testImage.bmp, I got this error:

----------------------------------------------------------------
Input filename:   unet.onnx
ONNX IR version:  0.0.7
Opset version:    12
Producer name:    keras2onnx
Producer version: 1.7.0
Domain:           onnxmltools
Model version:    0
Doc string:       
----------------------------------------------------------------

	WARNING: ONNX model has a newer ir_version (0.0.7) than this parser was built against (0.0.3).
While parsing node number 1 [Conv]:
ERROR: ModelImporter.cpp:296 In function importModel:
[5] Assertion failed: tensors.count(input_name)
ERROR: could not parse the model.
Segmentation fault (core dumped)

If I then tried to run the ONNX file and TensorRT with Docker container 20.03, which has CUDA 10.2 and TensorRT 7.0.0, then I get this error:

	----------------------------------------------------------------
Input filename:   unet.onnx
ONNX IR version:  0.0.7
Opset version:    12
Producer name:    keras2onnx
Producer version: 1.7.0
Domain:           onnxmltools
Model version:    0
Doc string:       
----------------------------------------------------------------
input_1: dynamic input is missing dimensions in profile 0.
Network validation failed.
Segmentation fault (core dumped)

can anyone help?

Hi @mke489,
Could you please share the onnx model file so we can help better.
Meanwhile, could you please try trtexec command in verbose mode?
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec

Thanks

Hi @mke489,
I could see your model has passed the conversion, and doesnt look like any issue with your model.
And there might be an issue with your script.
Will check and get back to you.
Thanks!

Hi @mke489,
Can you try using trtexec command to run your model and see if this works?

Thanks!
Aakanksha

Hi @mke489,
trtexec is an alternative way of generating your serialized engine quickly without having to develop your own application
And from the trtexec logs, you can see that the model has passed.
So you can generate engine file using trtexec command, alternatively.
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec

Thanks!

Hi @mke489,
did you try running your model with the latest script?

You can use the below command with your model name and shape to generate the engine/trt file.
trtexec --onnx=your_model.onnx --verbose --explicitBatch --shapes=input_name:64x3x288x288 --saveEngine=engineName.engine

Thanks!

Hi @mke489,

In the running dir, after running this command, an engine file will be generated which you can use for inferencing.
Also, you will need to change the input shape of your model.
Please refer the below link for details
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec
Thanks!

Hi @mke489
I can see from the logs that you are able to run your model successfully using trtexec.
You can use --saveEngine to save your model for inference.

Thanks!

Hi @mke489
You can refer to the below link to perform inference using C++

Thanks

it doesn’t say anywhere how to perform inference with .trt files