Errors with reading pb file in TensorRT and readNetFromTensorflow in C++

I have Python code along with TensorRT with Docker container 20.03, which has CUDA 10.2 and TensorRT 7.0.0

from __future__ import print_function

import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from imutils.paths import list_images
from keras import backend as K
from keras.callbacks import CSVLogger
from keras.layers import *
from keras.models import Model
from keras.optimizers import Adam
from import loadmat
from scipy.misc import imread
from import imsave
from skimage.transform import resize

from import loadmat
import cv2
import tensorflow as tf

target_size = (Ny, Ny)


LR= 1e-4
E, BS = 2,4 

def get_unet(img_rows, img_cols):
    inputs = Input((img_rows, img_cols, 1))
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = BatchNormalization()(conv1)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    conv1 = BatchNormalization()(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = BatchNormalization()(conv2)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    conv2 = BatchNormalization()(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = BatchNormalization()(conv3)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    conv3 = BatchNormalization()(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = BatchNormalization()(conv4)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    conv4 = BatchNormalization()(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = BatchNormalization()(conv5)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)
    conv5 = BatchNormalization()(conv5)
    conv5 = Dropout(0.5)(conv5)

    up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=3)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=3)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=3)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=3)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)
    def get_unet(img_rows, img_cols):
        inputs = Input((img_rows, img_cols, 1))

        conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)
    model = Model(inputs=[inputs], outputs=[conv10])

    return model

train_images=np.zeros([1, 1024,1024,1], dtype=float)
annot_train=np.zeros([1, 1024,1024,1], dtype=float)
test_images=np.zeros([1, 1024,1024,1], dtype=float)
annot_test=np.zeros([1, 1024,1024,1], dtype=float)

img = cv2.imread('owlResized.bmp', 0)/255.0 
label = cv2.imread('owlResized.bmp', 0)/255.0
train_images[0,:,:,0], annot_train[0,:,:,0] =img, label
test_images[0,:,:,0], annot_test[0,:,:,0] =img, label

print("finished reading")
C = np.concatenate([annot_test, annot_train])
I = np.concatenate([test_images, train_images])

unet = get_unet(Ny, Ny)
history =, C, verbose=2, epochs=E, batch_size=BS, validation_split=0.1)

#save pb'owlSimple', overwrite=True, include_optimizer=False, save_format='tf')

Then did

python3 -m tf2onnx.convert --opset 12 --saved-model ./owlSimple --output owlSimple12.onnx

I also tried opset of 9,10,11. They all seemed to convert successfully

When I tried to run opsets 11 and 12 in TensorRT, I would get as error:

ERROR: ModelImporter.cpp:92 In function parseGraph:
[8] Assertion failed: convertOnnxWeights(initializer, &weights, ctx)
ERROR: could not parse the model.

Segmentation fault (core dumped)

With opset 10, I got:

While parsing node number 45 [Resize]:
ERROR: builtin_op_importers.cpp:2412 In function importResize:
[8] Assertion failed: scales.is_weights() && "Resize scales must be an initializer!"
ERROR: could not parse the model.

and with Opset9 I got

While parsing node number 45 [Upsample]:
ERROR: builtin_op_importers.cpp:3240 In function importUpsample:
[8] Assertion failed: scales_input.is_weights()
ERROR: could not parse the model.

I got those errors even when I did


If I further simplified the Unet by replacing the Unet with

inputs = Input((img_rows, img_cols, 1))
conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

as seen in the commented out code above, then I get these errors when running the various opsets in TensorRT:

 input_2:0: dynamic input is missing dimensions in profile 0.
 Network validation failed.
 Segmentation fault (core dumped)

The TensorRT C++ I used is (It is based on code from

#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <memory>
#include <NvOnnxParser.h>
#include <vector>
#include <cuda_runtime_api.h>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/cudawarping.hpp>
#include <opencv2/core.hpp>
#include <opencv2/cudaarithm.hpp>
#include <algorithm>
#include <numeric>
#include <opencv2/dnn/dnn.hpp>

#include <opencv2/opencv.hpp>
#include <opencv2/cudaimgproc.hpp>

using namespace cv;

// utilities ----------------------------------------------------------------------------------------------------------
// class to log errors, warnings, and other information during the build and inference phases
class Logger : public nvinfer1::ILogger
    void log(Severity severity, const char* msg) override {
        // remove this 'if' if you need more logged info
        if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR)) {
            std::cout << msg << "\n";
} gLogger;

// destroy TensorRT objects if something goes wrong
struct TRTDestroy
    template <class T>
    void operator()(T* obj) const
        if (obj)

template <class T>
using TRTUniquePtr = std::unique_ptr<T, TRTDestroy>;

// calculate size of tensor
size_t getSizeByDim(const nvinfer1::Dims& dims)
    size_t size = 1;
    for (size_t i = 0; i < dims.nbDims; ++i)
        size *= dims.d[i];
    return size;

// get classes names
std::vector<std::string> getClassNames(const std::string& imagenet_classes)
    std::ifstream classes_file(imagenet_classes);
    std::vector<std::string> classes;
    if (!classes_file.good())
        std::cerr << "ERROR: can't read file with classes names.\n";
        return classes;
    std::string class_name;
    while (std::getline(classes_file, class_name))
    return classes;

// preprocessing stage ------------------------------------------------------------------------------------------------
void preprocessImage(const std::string& image_path, float* gpu_input, const nvinfer1::Dims& dims)
    // read input image
    cv::Mat frame = cv::imread(image_path);
    if (frame.empty())
        std::cerr << "Input image " << image_path << " load failed\n";
    cv::cuda::GpuMat gpu_frame;

    int channels=1;
    auto input_width = dims.d[1];
    auto input_height = dims.d[0];
    auto input_size = cv::Size(input_width, input_height);
    cv::cuda::GpuMat resized=gpu_frame;

    cv::cuda::GpuMat flt_image;
    resized.convertTo(flt_image, CV_32FC1, 1.f / 255.f);

    std::vector<cv::cuda::GpuMat> chw;
    for (size_t i = 0; i < channels; ++i)
        chw.emplace_back(cv::cuda::GpuMat(input_size, CV_32FC1, gpu_input + i * input_width * input_height));
    cv::cuda::split(flt_image, chw);

// post-processing stage ----------------------------------------------------------------------------------------------
void postprocessResults(float *gpu_output, const nvinfer1::Dims &dims, int batch_size)
    std::vector<float> cpu_output(getSizeByDim(dims) * batch_size);
    cudaMemcpy(, gpu_output, cpu_output.size() * sizeof(float), cudaMemcpyDeviceToHost);

    auto rows = dims.d[1];
    auto cols = dims.d[2];
    cv::Mat Finalmat = cv::Mat(rows, cols, CV_32FC1); // initialize matrix of uchar of 1-channel where you will store vec data

    //copy vector to mat
    memcpy(,, cpu_output.size()*sizeof(float));
    Finalmat.convertTo(Finalmat, CV_8UC3, 255.0);
    cv::imwrite("trt_output.bmp", Finalmat);

// initialize TensorRT Context and Engine and parse ONNX model --------------------------------------------------------------------
void parseOnnxModel(const std::string& model_path, TRTUniquePtr<nvinfer1::ICudaEngine>& engine,
                    TRTUniquePtr<nvinfer1::IExecutionContext>& context)
    nvinfer1::IBuilder *builder = nvinfer1::createInferBuilder(gLogger);
    const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    nvinfer1::INetworkDefinition *network = builder->createNetworkV2(explicitBatch);
    TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, gLogger)};
    // parse ONNX
    if (!parser->parseFromFile(model_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO)))
        std::cerr << "ERROR: could not parse the model.\n";

    //create Config to configure engine parameters such as max memory or set FP16 mode
    TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};
    nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
    profile->setDimensions("input_1", nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4(1,1024,1024,1));
    profile->setDimensions("input_1", nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims4(1,1024,1024,1));
    profile->setDimensions("input_1", nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims4(1,1024,1024,1));


    // allow TensorRT to use up to 1GB of GPU memory for tactic selection.
    config->setMaxWorkspaceSize(1ULL << 30);
    // use FP16 mode if possible
    if (builder->platformHasFastFp16())

    // we have only one image in batch
    // generate TensorRT engine optimized for the target platform
    engine.reset(builder->buildEngineWithConfig(*network, *config));

// main pipeline ------------------------------------------------------------------------------------------------------
int main(int argc, char* argv[])
    if (argc < 3)
        std::cerr << "usage: " << argv[0] << " [model_name].onnx [image_name].jpg\n";
        return -1;
    std::string model_path(argv[1]);
    std::string image_path(argv[2]);
    int batch_size = 1;

    TRTUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
    TRTUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
    parseOnnxModel(model_path, engine, context);

    // get sizes of input and output and allocate memory required for input data and for output data
    std::vector<nvinfer1::Dims> input_dims; // we expect only one input
    std::vector<nvinfer1::Dims> output_dims; // and one output

    context->setBindingDimensions(0, nvinfer1::Dims4(1, 1024,1024, 1));

    //getNbBindings is number of binding indices    

    // buffers for input and output data
    std::vector<void*> buffers(engine->getNbBindings()); 

    for (size_t i = 0; i < engine->getNbBindings(); ++i)
        if (engine->bindingIsInput(i))
        auto binding_size = getSizeByDim(context->getBindingDimensions(i)) * batch_size * sizeof(float);
        auto status = cudaMalloc(&buffers[i], binding_size);
        if (status)
            std::cout<<"cudaMalloc worked\n";

    if (input_dims.empty() || output_dims.empty())
        std::cerr << "Expect at least one input and one output for network\n";
        return -1;
    preprocessImage(image_path, (float *) buffers[0], input_dims[0]);
    postprocessResults((float *) buffers[1], output_dims[0], batch_size);

    for (void* buf : buffers)
    return 0;

I then tried to open the pb file with readNetFromTensorflow. I have saved_model.pb in /workspace/owlSimple/saved_model.pb and in /workspace. In /workspace, I’ve tried

    dnn:Net nt = cv::dnn::readNetFromTensorflow("saved_model.pb");
    dnn:Net nt = cv::dnn::readNetFromTensorflow("owlSimple/saved_model.pb");
    dnn:Net nt = cv::dnn::readNetFromTensorflow("./owlSimple/saved_model.pb");
    dnn:Net nt = cv::dnn::readNetFromTensorflow("/workspace/owlSimple/saved_model.pb");
    dnn:Net nt = cv::dnn::readNetFromTensorflow("/owlSimple/saved_model.pb");

But all of them are giving the same error of

String field 'opencv_tensorflow.FunctionDef.Node.ret' contains invalid UTF-8 data when parsing a protocol buffer. Use the 'bytes' type if you intend to send raw bytes.
what(): OpenCV(4.5.1-pre) /workspace/opencv/modules/dnn/src/tensorflow/tf_io.cpp:42: error: (-2:Unspecified error) FAILED: ReadProtoFromBinaryFile(param_file, param). Failed to parse GraphDef file: owlSimple/saved_model.pb in function 'ReadTFNetParamsFromBinaryFileOrDie'

Hi @mke489,

Please convert your pb file to onnx
and then convert it to trt.
If the issue still persist, please share onnx model.
for your reference ONNX-TensorRT

Thank you.

I said I converted the pb file to ONNX with

python3 -m tf2onnx.convert --opset 12 --saved-model ./owlSimple --output owlSimple12.onnx

The various ONNX files I tried:

owlSimple8.onnx (1.4 KB)

owlSimple9.onnx (30.0 MB)

owlSimple12.onnx (30.0 MB) owlSimple11.onnx (30.0 MB) owlSimple10.onnx (30.0 MB)

Hi @mke489,

This is a known issue and fix will be available in future release.
Meanwhile you can refer to the below link to resolve it.

Thank you.

if I use tensorrt 6 and docker 19.10, then with tf2onnx.convert --opset 9 I get thist:

Input filename:   owlSimple9.onnx
ONNX IR version:  0.0.4
Opset version:    9
Producer name:    tf2onnx
Producer version: 1.7.2
Model version:    0
Doc string:       

	WARNING: ONNX model has a newer ir_version (0.0.4) than this parser was built against (0.0.3).
While parsing node number 0 [Reshape]:
ERROR: ModelImporter.cpp:296 In function importModel:
[5] Assertion failed: tensors.count(input_name)
ERROR: could not parse the model.
Segmentation fault (core dumped)

I get the same error with opset 10,11,12, except it says the newer ir_version is 0.0.5, 0.0.6, 0.0.7, respectively

can anyone help?