TensorRT: input_1: dynamic input is missing dimensions in profile 0

mke489 · November 23, 2020, 10:12pm

I created an NN I trained in Python, converted it to ONNX, and now am trying to run that with TensorRT in C++. The C++ code I have is below (it is based on the code in How To Run Inference Using TensorRT C++ API | LearnOpenCV)

#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <memory>
#include <NvOnnxParser.h>
#include <vector>
#include <cuda_runtime_api.h>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/core.hpp>
#include <algorithm>
#include <numeric>

class Logger : public nvinfer1::ILogger
{
public:
    void log(Severity severity, const char* msg) override {
        // remove this 'if' if you need more logged info
        if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR)) {
            std::cout << msg << "\n";
        }
    }
} gLogger;

struct TRTDestroy
{
    template <class T>
    void operator()(T* obj) const
    {
        if (obj)
        {
            obj->destroy();
        }
    }
};

template <class T>
using TRTUniquePtr = std::unique_ptr<T, TRTDestroy>;

size_t getSizeByDim(const nvinfer1::Dims& dims)
{
    size_t size = 1;
    for (size_t i = 0; i < dims.nbDims; ++i)
    {
        size *= dims.d[i];
    }
    return size;
}

std::vector<std::string> getClassNames(const std::string& imagenet_classes)
{
    std::ifstream classes_file(imagenet_classes);
    std::vector<std::string> classes;
    if (!classes_file.good())
    {
        std::cerr << "ERROR: can't read file with classes names.\n";
        return classes;
    }
    std::string class_name;
    while (std::getline(classes_file, class_name))
    {
        classes.push_back(class_name);
    }
    return classes;
}

void preprocessImage(const std::string& image_path, float* gpu_input, const nvinfer1::Dims& dims)
{
    cv::Mat frame = cv::imread(image_path);
    if (frame.empty())
    {
        std::cerr << "Input image " << image_path << " load failed\n";
        return;
    }
    cv::cuda::GpuMat gpu_frame;
    gpu_frame.upload(frame);
    int channels=1;
    auto input_width = dims.d[1];
    auto input_height = dims.d[0];
    auto input_size = cv::Size(input_width, input_height);
    cv::cuda::GpuMat resized=gpu_frame;
    cv::cuda::GpuMat flt_image;
    resized.convertTo(flt_image, CV_32FC1, 1.f / 255.f);
    std::vector<cv::cuda::GpuMat> chw;
    for (size_t i = 0; i < channels; ++i)
    {
        chw.emplace_back(cv::cuda::GpuMat(input_size, CV_32FC1, gpu_input + i * input_width * input_height));
    }
    cv::split(flt_image, chw);
}

void postprocessResults(float *gpu_output, const nvinfer1::Dims &dims, int batch_size)
{
    std::vector<float> cpu_output(getSizeByDim(dims) * batch_size);
    cudaMemcpy(cpu_output.data(), gpu_output, cpu_output.size() * sizeof(float), cudaMemcpyDeviceToHost);

    auto cols = dims.d[1];
    auto rows = dims.d[0];
    cv::Mat Finalmat = cv::Mat(rows, cols, CV_32FC1); 
    memcpy(Finalmat.data, cpu_output.data(), cpu_output.size()*sizeof(float));
    Finalmat.convertTo(Finalmat, CV_8UC3, 255.0);
    cv::imwrite("/tensorRT.bmp", Finalmat);
}

void parseOnnxModel(const std::string& model_path, TRTUniquePtr<nvinfer1::ICudaEngine>& engine,
                    TRTUniquePtr<nvinfer1::IExecutionContext>& context)
{
    nvinfer1::IBuilder *builder = nvinfer1::createInferBuilder(gLogger);
    const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    nvinfer1::INetworkDefinition *network = builder->createNetworkV2(explicitBatch);
    TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, gLogger)};
    
    // parse ONNX
    if (!parser->parseFromFile(model_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO)))
    {
        std::cerr << "ERROR: could not parse the model.\n";
        return;
    }

    //create Config to configure engine parameters such as max memory or set FP16 mode
    TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};

    nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
    profile->setDimensions("foo", nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims3(3,100,200));
    profile->setDimensions("foo", nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims3(3,1024,1024));
    profile->setDimensions("foo", nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims3(3,4096,4096));

    config->addOptimizationProfile(profile);
    // allow TensorRT to use up to 1GB of GPU memory for tactic selection.
    config->setMaxWorkspaceSize(1ULL << 30);
    // use FP16 mode if possible
    if (builder->platformHasFastFp16())
    {
        config->setFlag(nvinfer1::BuilderFlag::kFP16);
    }
    // we have only one image in batch
    builder->setMaxBatchSize(1);
    // generate TensorRT engine optimized for the target platform
    engine.reset(builder->buildEngineWithConfig(*network, *config));
    context.reset(engine->createExecutionContext());
}

// main pipeline ------------------------------------------------------------------------------------------------------
int main(int argc, char* argv[])
    if (argc < 3)
    {
        std::cerr << "usage: " << argv[0] << " [model_name].onnx [image_name].jpg\n";
        return -1;
    }
    std::string model_path(argv[1]);
    std::string image_path(argv[2]);
    int batch_size = 1;

    TRTUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
    TRTUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
    parseOnnxModel(model_path, engine, context);

    std::vector<nvinfer1::Dims> input_dims; // we expect only one input
    std::vector<nvinfer1::Dims> output_dims; // and one output

    std::vector<void*> buffers(engine->getNbBindings()); // buffers for input and output data
    for (size_t i = 0; i < engine->getNbBindings(); ++i)
    {
        auto binding_size = getSizeByDim(engine->getBindingDimensions(i)) * batch_size * sizeof(float);
        cudaMalloc(&buffers[i], binding_size);
        if (engine->bindingIsInput(i))
        {
            input_dims.emplace_back(engine->getBindingDimensions(i));
        }
        else
        {
            output_dims.emplace_back(engine->getBindingDimensions(i));
        }
    }
    if (input_dims.empty() || output_dims.empty())
    {
        std::cerr << "Expect at least one input and one output for network\n";
        return -1;
    }

    // preprocess input data
    preprocessImage(image_path, (float *) buffers[0], input_dims[0]);
    // inference - "enqueue" asynchronously executes inference on a batch. 
    context->enqueue(batch_size, buffers.data(), 0, nullptr);
    // postprocess results
    postprocessResults((float *) buffers[1], output_dims[0], batch_size);


    for (void* buf : buffers)
    {
        cudaFree(buf);
    }
    return 0;
}

Operating System + Version: Ubuntu 18
TensorRT Version:
For training the NN, creating the .keras and ONNX files, I used Docker container 19.10, which has CUDA 10.1 because according to this source, if I wanted to use the GPU with TF then I needed CUDA 10.1: 從原始碼開始建構 | TensorFlow

When I ran ./trt_sample unet.onnx testImage.bmp, I got this error:

----------------------------------------------------------------
Input filename:   unet.onnx
ONNX IR version:  0.0.7
Opset version:    12
Producer name:    keras2onnx
Producer version: 1.7.0
Domain:           onnxmltools
Model version:    0
Doc string:       
----------------------------------------------------------------

	WARNING: ONNX model has a newer ir_version (0.0.7) than this parser was built against (0.0.3).
While parsing node number 1 [Conv]:
ERROR: ModelImporter.cpp:296 In function importModel:
[5] Assertion failed: tensors.count(input_name)
ERROR: could not parse the model.
Segmentation fault (core dumped)

If I then tried to run the ONNX file and TensorRT with Docker container 20.03, which has CUDA 10.2 and TensorRT 7.0.0, then I get this error:

	----------------------------------------------------------------
Input filename:   unet.onnx
ONNX IR version:  0.0.7
Opset version:    12
Producer name:    keras2onnx
Producer version: 1.7.0
Domain:           onnxmltools
Model version:    0
Doc string:       
----------------------------------------------------------------
input_1: dynamic input is missing dimensions in profile 0.
Network validation failed.
Segmentation fault (core dumped)

can anyone help?

SunilJB · November 24, 2020, 6:48am

Hi @mke489,
Could you please share the onnx model file so we can help better.
Meanwhile, could you please try trtexec command in verbose mode?
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec

Thanks

AakankshaS · November 25, 2020, 5:36am

Hi @mke489,
I could see your model has passed the conversion, and doesnt look like any issue with your model.
And there might be an issue with your script.
Will check and get back to you.
Thanks!

AakankshaS · November 30, 2020, 7:32pm

Hi @mke489,
Can you try using trtexec command to run your model and see if this works?

Thanks!
Aakanksha

AakankshaS · December 3, 2020, 5:15am

Hi @mke489,
trtexec is an alternative way of generating your serialized engine quickly without having to develop your own application
And from the trtexec logs, you can see that the model has passed.
So you can generate engine file using trtexec command, alternatively.
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec

Thanks!

AakankshaS · December 3, 2020, 6:52pm

Hi @mke489,
did you try running your model with the latest script?

You can use the below command with your model name and shape to generate the engine/trt file.
trtexec --onnx=your_model.onnx --verbose --explicitBatch --shapes=input_name:64x3x288x288 --saveEngine=engineName.engine

Thanks!

AakankshaS · December 3, 2020, 7:27pm

Hi @mke489,

In the running dir, after running this command, an engine file will be generated which you can use for inferencing.
Also, you will need to change the input shape of your model.
Please refer the below link for details
https://github.com/NVIDIA/TensorRT/tree/master/samples/opensource/trtexec
Thanks!

AakankshaS · January 28, 2021, 5:15pm

Hi @mke489
I can see from the logs that you are able to run your model successfully using trtexec.
You can use --saveEngine to save your model for inference.

Thanks!

AakankshaS · February 8, 2021, 4:24am

Hi @mke489
You can refer to the below link to perform inference using C++

Thanks

mke489 · February 9, 2021, 5:35pm

it doesn’t say anywhere how to perform inference with .trt files

Topic		Replies	Views
[TensorRT] ERROR: input: dynamic input is missing dimensions in profile 0 TensorRT	11	6783	October 12, 2021
Failed to build engine caused by the dynamic input error TensorRT tensorrt	2	963	November 21, 2022
Onnx with dynamic batch cannot be parsed TensorRT tensorrt	12	1508	August 9, 2021
Batch Inference Wrong in Python API TensorRT	15	3535	October 12, 2021
TensorRT Support for 5D input tensor TensorRT	9	1373	September 8, 2021
TensorRT С++ optimization profile TensorRT tensorrt , opencv , cuda	29	3024	September 9, 2021
Errors with reading pb file in TensorRT and readNetFromTensorflow in C++ TensorRT	3	1233	January 26, 2021
Assertion Error in buildMemGraph: 0 (mg.nodes[mg.regionIndices[outputRegion]].size == mg.nodes[mg.regionIndices[inputRegion]].size) TensorRT	10	1285	October 12, 2021
Trtexec onnx inception_v3 TensorRT	8	1263	March 3, 2021
Trtexec failed to create an engine from onnx file with fp16 TensorRT	7	1196	July 8, 2022

TensorRT: input_1: dynamic input is missing dimensions in profile 0

Related topics