TensorRT7.0.0 NGC simple tf-trt test problem

I have problem with executing this simple test case with TensorRT7.0.0 from NGC docker image.


""" Simple model conversion test """
    import os
    import shutil
    import tensorflow as tf


    class SimpleModelBuilderTest(tf.test.TestCase):
        """ SimpleModelBuilderTest class """

        model_name = "SimpleModel"
        pb_model = ""
        trt_model = ""

        def testSimpleModelConversion(self):
            loaded_trt_model = tf.saved_model.load(self.trt_model).signatures[
                "serving_default"
            ]

            node_name_to_op = {
                node.name: node.op for node in loaded_trt_model.graph.as_graph_def().node
            }

            self.assertEqual(
                3,
                len(loaded_trt_model.graph.as_graph_def().node),
                "Test converted simple model number of operations",
            )
            self.assertEqual(
                {
                    "flatten_input": "Placeholder",
                    "PartitionedCall": "PartitionedCall",
                    "Identity": "Identity",
                },
                node_name_to_op,
                "Test converted simple model structure",
            )

        def setUp(self):
            model = tf.keras.models.Sequential(
                [
                    tf.keras.layers.Flatten(input_shape=[28, 28]),
                    tf.keras.layers.Dense(128, activation="relu"),
                    tf.keras.layers.Dense(10),
                ]
            )

            loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
            model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])
            self.pb_model = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), (self.model_name + "_pb")
            )
            self.trt_model = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), (self.model_name + "_trt")
            )

            if not os.path.exists(self.pb_model):
                os.mkdir(self.pb_model)

            if not os.path.exists(self.trt_model):
                os.mkdir(self.trt_model)

            tf.saved_model.save(model, self.pb_model)
            params = tf.experimental.tensorrt.ConversionParams(precision_mode="FP16")
            converter = tf.experimental.tensorrt.Converter(
                input_saved_model_dir=self.pb_model, conversion_params=params
            )
            converter.convert()

            def _my_input_fn():
                for _ in range(10):
                    yield [tf.ones([28, 28])]

            converter.build(input_fn=_my_input_fn)
            converter.save(self.trt_model)

        def tearDown(self):
            if os.path.exists(self.pb_model):
                shutil.rmtree(self.pb_model)

            if os.path.exists(self.trt_model):
                shutil.rmtree(self.trt_model)


    if __name__ == "__main__":
        tf.test.main()

If I run this with TensorRT7.1.3 (in non-docker environment on my host machine) it runs successfully:

2020-07-14 13:28:39.367411: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
Running tests under Python 3.6.9: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/tf3/bin/python
[ RUN      ] SimpleModelBuilderTest.testSimpleModelConversion
2020-07-14 13:28:40.749444: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2020-07-14 13:28:40.771894: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:40.772406: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.607GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2020-07-14 13:28:40.772422: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 13:28:40.785348: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-07-14 13:28:40.793098: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-07-14 13:28:40.794967: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-07-14 13:28:40.808926: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-07-14 13:28:40.811150: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-07-14 13:28:40.811741: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.8
2020-07-14 13:28:40.811806: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:40.812310: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:40.812906: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-07-14 13:28:40.813586: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-07-14 13:28:40.843677: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 3600000000 Hz
2020-07-14 13:28:40.844910: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x537d1f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-07-14 13:28:40.844926: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-07-14 13:28:40.909755: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:40.910085: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x537f330 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-07-14 13:28:40.910097: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): GeForce GTX 1080 Ti, Compute Capability 6.1
2020-07-14 13:28:40.910814: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:40.911064: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.607GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2020-07-14 13:28:40.911081: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 13:28:40.911092: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-07-14 13:28:40.911099: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-07-14 13:28:40.911106: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-07-14 13:28:40.911113: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-07-14 13:28:40.911120: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-07-14 13:28:40.911128: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.8
2020-07-14 13:28:40.911155: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:40.911407: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:40.911651: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-07-14 13:28:40.911990: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 13:28:41.364969: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 13:28:41.364989: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263]      0 
2020-07-14 13:28:41.365092: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0:   N 
2020-07-14 13:28:41.365434: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.365706: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.366227: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 8392 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1)
/home/dellboy/git/recogni/gpu-demo-git/gpu-demo/tf3/lib/python3.6/site-packages/tensorflow/python/framework/tensor_util.py:528: DeprecationWarning: tostring() is deprecated. Use tobytes() instead.
  tensor_proto.tensor_content = nparray.tostring()
2020-07-14 13:28:41.536641: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
INFO:tensorflow:Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_pb/assets
I0714 13:28:41.678989 140651616806720 builder_impl.py:775] Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_pb/assets
2020-07-14 13:28:41.710480: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libnvinfer.so.7
INFO:tensorflow:Linked TensorRT version: (7, 1, 3)
I0714 13:28:41.710768 140651616806720 trt_convert.py:264] Linked TensorRT version: (7, 1, 3)
INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)
I0714 13:28:41.710863 140651616806720 trt_convert.py:265] Loaded TensorRT version: (7, 1, 3)
2020-07-14 13:28:41.774101: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.774333: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2020-07-14 13:28:41.774605: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 13:28:41.774911: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.775057: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.607GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2020-07-14 13:28:41.775072: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 13:28:41.775083: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-07-14 13:28:41.775090: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-07-14 13:28:41.775097: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-07-14 13:28:41.775105: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-07-14 13:28:41.775111: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-07-14 13:28:41.775118: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.8
2020-07-14 13:28:41.775186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.775337: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.775516: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-07-14 13:28:41.775547: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 13:28:41.775551: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263]      0 
2020-07-14 13:28:41.775554: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0:   N 
2020-07-14 13:28:41.775594: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.775749: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.775886: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 8392 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1)
2020-07-14 13:28:41.786768: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:816] Optimization results for grappler item: graph_to_optimize
2020-07-14 13:28:41.786780: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   function_optimizer: Graph size after: 25 nodes (18), 24 edges (17), time = 3.29ms.
2020-07-14 13:28:41.786783: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   function_optimizer: function_optimizer did nothing. time = 0.008ms.
2020-07-14 13:28:41.802347: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.802719: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2020-07-14 13:28:41.802771: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 13:28:41.803011: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.803158: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.607GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2020-07-14 13:28:41.803175: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 13:28:41.803185: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-07-14 13:28:41.803194: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-07-14 13:28:41.803201: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-07-14 13:28:41.803207: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-07-14 13:28:41.803213: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-07-14 13:28:41.803220: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.8
2020-07-14 13:28:41.803248: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.803402: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.803580: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-07-14 13:28:41.803598: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 13:28:41.803602: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263]      0 
2020-07-14 13:28:41.803605: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0:   N 
2020-07-14 13:28:41.803647: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.803803: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:41.803942: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 8392 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1)
2020-07-14 13:28:41.817940: I tensorflow/compiler/tf2tensorrt/segment/segment.cc:772] There are 5 ops of 3 different types in the graph that are not converted to TensorRT: Placeholder, Identity, NoOp, (For more information see https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html#supported-ops).
2020-07-14 13:28:41.818328: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:751] Number of TensorRT candidate segments: 1
2020-07-14 13:28:41.819409: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:854] Replaced segment 0 consisting of 12 nodes by TRTEngineOp_0_0.
2020-07-14 13:28:41.822417: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:816] Optimization results for grappler item: tf_graph
2020-07-14 13:28:41.822428: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 17 nodes (-8), 16 edges (-8), time = 7.053ms.
2020-07-14 13:28:41.822431: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   layout: Graph size after: 17 nodes (0), 16 edges (0), time = 2.666ms.
2020-07-14 13:28:41.822434: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 17 nodes (0), 16 edges (0), time = 0.404ms.
2020-07-14 13:28:41.822437: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   TensorRTOptimizer: Graph size after: 6 nodes (-11), 3 edges (-13), time = 3.841ms.
2020-07-14 13:28:41.822440: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 4 nodes (-2), 3 edges (0), time = 0.21ms.
2020-07-14 13:28:41.822443: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:816] Optimization results for grappler item: TRTEngineOp_0_0_native_segment
2020-07-14 13:28:41.822446: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.363ms.
2020-07-14 13:28:41.822448: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   layout: Graph size after: 14 nodes (0), 13 edges (0), time = 0.312ms.
2020-07-14 13:28:41.822451: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.287ms.
2020-07-14 13:28:41.822455: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   TensorRTOptimizer: Graph size after: 14 nodes (0), 13 edges (0), time = 0.028ms.
2020-07-14 13:28:41.822458: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.291ms.
2020-07-14 13:28:41.847477: W tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc:588] Running native segment forTRTEngineOp_0_0 due to failure in verifying input shapes: Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0_0: [[28,28]] != [[?,28,28]]
2020-07-14 13:28:41.849039: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-07-14 13:28:42.048593: W tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc:588] Running native segment forTRTEngineOp_0_0 due to failure in verifying input shapes: Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0_0: [[28,28]] != [[?,28,28]]
2020-07-14 13:28:42.048926: W tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc:588] Running native segment forTRTEngineOp_0_0 due to failure in verifying input shapes: Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0_0: [[28,28]] != [[?,28,28]]
2020-07-14 13:28:42.049302: W tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc:588] Running native segment forTRTEngineOp_0_0 due to failure in verifying input shapes: Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0_0: [[28,28]] != [[?,28,28]]
2020-07-14 13:28:42.049665: W tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc:588] Running native segment forTRTEngineOp_0_0 due to failure in verifying input shapes: Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0_0: [[28,28]] != [[?,28,28]]
INFO:tensorflow:Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_trt/assets
I0714 13:28:42.131176 140651616806720 builder_impl.py:775] Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_trt/assets
[       OK ] SimpleModelBuilderTest.testSimpleModelConversion
[ RUN      ] SimpleModelBuilderTest.test_session
WARNING:tensorflow:Unresolved object in checkpoint: (root).trt_engine_resources.TRTEngineOp_0_0._serialized_trt_resource_filename
W0714 13:28:42.275686 140651616806720 util.py:150] Unresolved object in checkpoint: (root).trt_engine_resources.TRTEngineOp_0_0._serialized_trt_resource_filename
WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.
W0714 13:28:42.275778 140651616806720 util.py:158] A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details.
INFO:tensorflow:Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_pb/assets
I0714 13:28:42.402992 140651616806720 builder_impl.py:775] Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_pb/assets
INFO:tensorflow:Linked TensorRT version: (7, 1, 3)
I0714 13:28:42.406728 140651616806720 trt_convert.py:264] Linked TensorRT version: (7, 1, 3)
INFO:tensorflow:Loaded TensorRT version: (7, 1, 3)
I0714 13:28:42.406787 140651616806720 trt_convert.py:265] Loaded TensorRT version: (7, 1, 3)
2020-07-14 13:28:42.501322: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.501487: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2020-07-14 13:28:42.501523: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 13:28:42.501728: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.501872: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.607GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2020-07-14 13:28:42.501888: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 13:28:42.501898: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-07-14 13:28:42.501905: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-07-14 13:28:42.501914: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-07-14 13:28:42.501921: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-07-14 13:28:42.501928: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-07-14 13:28:42.501934: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.8
2020-07-14 13:28:42.501960: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.502112: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.502244: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-07-14 13:28:42.502259: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 13:28:42.502263: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263]      0 
2020-07-14 13:28:42.502266: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0:   N 
2020-07-14 13:28:42.502304: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.502459: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.502597: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 8392 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1)
2020-07-14 13:28:42.503868: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:816] Optimization results for grappler item: graph_to_optimize
2020-07-14 13:28:42.503879: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   function_optimizer: Graph size after: 25 nodes (18), 24 edges (17), time = 0.454ms.
2020-07-14 13:28:42.503883: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   function_optimizer: function_optimizer did nothing. time = 0.008ms.
2020-07-14 13:28:42.518413: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.518610: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2020-07-14 13:28:42.518682: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 13:28:42.518946: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.519121: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.607GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2020-07-14 13:28:42.519136: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 13:28:42.519147: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-07-14 13:28:42.519154: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-07-14 13:28:42.519177: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-07-14 13:28:42.519184: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-07-14 13:28:42.519192: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-07-14 13:28:42.519199: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.8
2020-07-14 13:28:42.519227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.519382: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.519562: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-07-14 13:28:42.519596: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 13:28:42.519601: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263]      0 
2020-07-14 13:28:42.519605: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0:   N 
2020-07-14 13:28:42.519647: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.519847: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-07-14 13:28:42.519999: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 8392 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1)
2020-07-14 13:28:42.523035: I tensorflow/compiler/tf2tensorrt/segment/segment.cc:772] There are 5 ops of 3 different types in the graph that are not converted to TensorRT: Placeholder, Identity, NoOp, (For more information see https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html#supported-ops).
2020-07-14 13:28:42.523080: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:751] Number of TensorRT candidate segments: 1
2020-07-14 13:28:42.523397: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:854] Replaced segment 0 consisting of 12 nodes by TRTEngineOp_1_0.
2020-07-14 13:28:42.526199: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:816] Optimization results for grappler item: tf_graph
2020-07-14 13:28:42.526209: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 17 nodes (-8), 16 edges (-8), time = 1.075ms.
2020-07-14 13:28:42.526213: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   layout: Graph size after: 17 nodes (0), 16 edges (0), time = 0.333ms.
2020-07-14 13:28:42.526216: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 17 nodes (0), 16 edges (0), time = 0.341ms.
2020-07-14 13:28:42.526219: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   TensorRTOptimizer: Graph size after: 6 nodes (-11), 3 edges (-13), time = 0.981ms.
2020-07-14 13:28:42.526222: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 4 nodes (-2), 3 edges (0), time = 0.307ms.
2020-07-14 13:28:42.526224: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:816] Optimization results for grappler item: TRTEngineOp_1_0_native_segment
2020-07-14 13:28:42.526227: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.287ms.
2020-07-14 13:28:42.526230: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   layout: Graph size after: 14 nodes (0), 13 edges (0), time = 0.314ms.
2020-07-14 13:28:42.526232: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.288ms.
2020-07-14 13:28:42.526235: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   TensorRTOptimizer: Graph size after: 14 nodes (0), 13 edges (0), time = 0.029ms.
2020-07-14 13:28:42.526238: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:818]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.295ms.
INFO:tensorflow:Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_trt/assets
I0714 13:28:42.605518 140651616806720 builder_impl.py:775] Assets written to: /home/dellboy/git/recogni/gpu-demo-git/gpu-demo/test/SimpleModel_trt/assets
WARNING:tensorflow:From /usr/lib/python3.6/contextlib.py:60: TensorFlowTestCase.test_session (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `self.session()` or `self.cached_session()` instead.
W0714 13:28:42.611315 140651616806720 deprecation.py:323] From /usr/lib/python3.6/contextlib.py:60: TensorFlowTestCase.test_session (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `self.session()` or `self.cached_session()` instead.
[       OK ] SimpleModelBuilderTest.test_session
----------------------------------------------------------------------
Ran 2 tests in 1.876s

OK

But when it’s run with NGC nvcr.io/nvidia/tensorflow:20.03-tf2-py3 then I got the following error:

Running tests under Python 3.6.9: /usr/bin/python
[ RUN      ] SimpleModelBuilderTest.testSimpleModelConversion
2020-07-14 11:28:23.557441: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-07-14 11:28:23.639944: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:19:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:23.640739: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 1 with properties: 
pciBusID: 0000:1a:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:23.641506: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 2 with properties: 
pciBusID: 0000:67:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:23.642259: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 3 with properties: 
pciBusID: 0000:68:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:23.642493: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 11:28:23.643988: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-14 11:28:23.645434: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-14 11:28:23.645701: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-14 11:28:23.647288: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-14 11:28:23.648162: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-14 11:28:23.651691: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-14 11:28:23.657589: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0, 1, 2, 3
2020-07-14 11:28:23.657888: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX512F
2020-07-14 11:28:24.185383: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:19:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:24.186120: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 1 with properties: 
pciBusID: 0000:1a:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:24.186839: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 2 with properties: 
pciBusID: 0000:67:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:24.187545: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 3 with properties: 
pciBusID: 0000:68:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:24.187592: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 11:28:24.187605: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-14 11:28:24.187616: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-14 11:28:24.187629: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-14 11:28:24.187643: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-14 11:28:24.187657: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-14 11:28:24.187669: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-14 11:28:24.193071: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0, 1, 2, 3
2020-07-14 11:28:24.193110: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 11:28:24.196384: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 11:28:24.196400: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      0 1 2 3 
2020-07-14 11:28:24.196409: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 0:   N N N N 
2020-07-14 11:28:24.196415: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 1:   N N N N 
2020-07-14 11:28:24.196421: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 2:   N N N N 
2020-07-14 11:28:24.196427: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 3:   N N N N 
2020-07-14 11:28:24.200635: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10202 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:19:00.0, compute capability: 7.5)
2020-07-14 11:28:24.202325: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10202 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:1a:00.0, compute capability: 7.5)
2020-07-14 11:28:24.203963: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10202 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:67:00.0, compute capability: 7.5)
2020-07-14 11:28:24.205618: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10178 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:68:00.0, compute capability: 7.5)
2020-07-14 11:28:24.892780: W tensorflow/python/util/util.cc:329] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
W0714 11:28:25.064065 140134759458624 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /workspace/gpu-demo/test/SimpleModel_pb/assets
I0714 11:28:25.139219 140134759458624 builder_impl.py:775] Assets written to: /workspace/gpu-demo/test/SimpleModel_pb/assets
2020-07-14 11:28:25.156946: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer.so.7
INFO:tensorflow:Linked TensorRT version: (7, 0, 0)
I0714 11:28:25.157006 140134759458624 trt_convert.py:264] Linked TensorRT version: (7, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (7, 0, 0)
I0714 11:28:25.157124 140134759458624 trt_convert.py:265] Loaded TensorRT version: (7, 0, 0)
2020-07-14 11:28:25.259986: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 4
2020-07-14 11:28:25.260078: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 11:28:25.260980: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:19:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:25.261683: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 1 with properties: 
pciBusID: 0000:1a:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:25.262374: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 2 with properties: 
pciBusID: 0000:67:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:25.263060: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 3 with properties: 
pciBusID: 0000:68:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:25.263090: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 11:28:25.263101: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-14 11:28:25.263115: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-14 11:28:25.263125: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-14 11:28:25.263137: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-14 11:28:25.263146: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-14 11:28:25.263156: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-14 11:28:25.268188: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0, 1, 2, 3
2020-07-14 11:28:26.191270: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 11:28:26.191310: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      0 1 2 3 
2020-07-14 11:28:26.191317: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 0:   N N N N 
2020-07-14 11:28:26.191321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 1:   N N N N 
2020-07-14 11:28:26.191325: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 2:   N N N N 
2020-07-14 11:28:26.191330: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 3:   N N N N 
2020-07-14 11:28:26.194655: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10202 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:19:00.0, compute capability: 7.5)
2020-07-14 11:28:26.195367: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10202 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:1a:00.0, compute capability: 7.5)
2020-07-14 11:28:26.196062: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10202 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:67:00.0, compute capability: 7.5)
2020-07-14 11:28:26.196771: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10178 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:68:00.0, compute capability: 7.5)
2020-07-14 11:28:26.200952: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: graph_to_optimize
2020-07-14 11:28:26.200973: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: Graph size after: 26 nodes (19), 36 edges (29), time = 0.681ms.
2020-07-14 11:28:26.200979: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: function_optimizer did nothing. time = 0.016ms.
2020-07-14 11:28:26.227635: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 4
2020-07-14 11:28:26.227730: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 11:28:26.228503: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:19:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.229208: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 1 with properties: 
pciBusID: 0000:1a:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.229893: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 2 with properties: 
pciBusID: 0000:67:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.230576: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 3 with properties: 
pciBusID: 0000:68:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.230610: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 11:28:26.230620: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-14 11:28:26.230630: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-14 11:28:26.230639: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-14 11:28:26.230649: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-14 11:28:26.230658: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-14 11:28:26.230668: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-14 11:28:26.235667: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0, 1, 2, 3
2020-07-14 11:28:26.235882: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 11:28:26.235890: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      0 1 2 3 
2020-07-14 11:28:26.235900: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 0:   N N N N 
2020-07-14 11:28:26.235907: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 1:   N N N N 
2020-07-14 11:28:26.235914: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 2:   N N N N 
2020-07-14 11:28:26.235921: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 3:   N N N N 
2020-07-14 11:28:26.239066: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10202 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:19:00.0, compute capability: 7.5)
2020-07-14 11:28:26.239773: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10202 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:1a:00.0, compute capability: 7.5)
2020-07-14 11:28:26.240471: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10202 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:67:00.0, compute capability: 7.5)
2020-07-14 11:28:26.241176: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10178 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:68:00.0, compute capability: 7.5)
2020-07-14 11:28:26.247880: I tensorflow/compiler/tf2tensorrt/segment/segment.cc:460] There are 6 ops of 3 different types in the graph that are not converted to TensorRT: Identity, NoOp, Placeholder, (For more information see https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html#supported-ops).
2020-07-14 11:28:26.247963: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:643] Number of TensorRT candidate segments: 1
2020-07-14 11:28:26.248686: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:745] Replaced segment 0 consisting of 12 nodes by TRTEngineOp_0.
2020-07-14 11:28:26.253086: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: tf_graph
2020-07-14 11:28:26.253106: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 22 nodes (-4), 28 edges (-8), time = 1.639ms.
2020-07-14 11:28:26.253114: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   layout: Graph size after: 22 nodes (0), 28 edges (0), time = 0.787ms.
2020-07-14 11:28:26.253121: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 22 nodes (0), 28 edges (0), time = 0.637ms.
2020-07-14 11:28:26.253127: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   TensorRTOptimizer: Graph size after: 11 nodes (-11), 12 edges (-16), time = 1.698ms.
2020-07-14 11:28:26.253134: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 9 nodes (-2), 10 edges (-2), time = 0.472ms.
2020-07-14 11:28:26.253141: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: TRTEngineOp_0_native_segment
2020-07-14 11:28:26.253147: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.471ms.
2020-07-14 11:28:26.253153: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   layout: Graph size after: 14 nodes (0), 13 edges (0), time = 0.493ms.
2020-07-14 11:28:26.253160: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.461ms.
2020-07-14 11:28:26.253166: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   TensorRTOptimizer: Graph size after: 14 nodes (0), 13 edges (0), time = 0.043ms.
2020-07-14 11:28:26.253173: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.434ms.
2020-07-14 11:28:26.273895: W tensorflow/core/framework/op_kernel.cc:1753] OP_REQUIRES failed at trt_engine_op.cc:563 : Invalid argument: Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0: [[28,28]] != [[?,28,28]]
[  FAILED  ] SimpleModelBuilderTest.testSimpleModelConversion
[ RUN      ] SimpleModelBuilderTest.test_session
INFO:tensorflow:Assets written to: /workspace/gpu-demo/test/SimpleModel_pb/assets
I0714 11:28:26.558778 140134759458624 builder_impl.py:775] Assets written to: /workspace/gpu-demo/test/SimpleModel_pb/assets
INFO:tensorflow:Linked TensorRT version: (7, 0, 0)
I0714 11:28:26.565369 140134759458624 trt_convert.py:264] Linked TensorRT version: (7, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (7, 0, 0)
I0714 11:28:26.565489 140134759458624 trt_convert.py:265] Loaded TensorRT version: (7, 0, 0)
2020-07-14 11:28:26.668188: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 4
2020-07-14 11:28:26.668274: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 11:28:26.669083: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:19:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.669785: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 1 with properties: 
pciBusID: 0000:1a:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.670474: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 2 with properties: 
pciBusID: 0000:67:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.671158: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 3 with properties: 
pciBusID: 0000:68:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.671191: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 11:28:26.671202: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-14 11:28:26.671211: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-14 11:28:26.671221: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-14 11:28:26.671231: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-14 11:28:26.671241: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-14 11:28:26.671249: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-14 11:28:26.676247: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0, 1, 2, 3
2020-07-14 11:28:26.676465: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 11:28:26.676474: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      0 1 2 3 
2020-07-14 11:28:26.676480: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 0:   N N N N 
2020-07-14 11:28:26.676486: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 1:   N N N N 
2020-07-14 11:28:26.676491: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 2:   N N N N 
2020-07-14 11:28:26.676497: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 3:   N N N N 
2020-07-14 11:28:26.679631: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10202 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:19:00.0, compute capability: 7.5)
2020-07-14 11:28:26.680327: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10202 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:1a:00.0, compute capability: 7.5)
2020-07-14 11:28:26.681021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10202 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:67:00.0, compute capability: 7.5)
2020-07-14 11:28:26.681710: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10178 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:68:00.0, compute capability: 7.5)
2020-07-14 11:28:26.685243: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: graph_to_optimize
2020-07-14 11:28:26.685262: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: Graph size after: 26 nodes (19), 36 edges (29), time = 0.632ms.
2020-07-14 11:28:26.685269: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   function_optimizer: function_optimizer did nothing. time = 0.016ms.
2020-07-14 11:28:26.710689: I tensorflow/core/grappler/devices.cc:55] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 4
2020-07-14 11:28:26.710768: I tensorflow/core/grappler/clusters/single_machine.cc:356] Starting new session
2020-07-14 11:28:26.711535: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:19:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.712228: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 1 with properties: 
pciBusID: 0000:1a:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.712922: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 2 with properties: 
pciBusID: 0000:67:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.713606: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 3 with properties: 
pciBusID: 0000:68:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-07-14 11:28:26.713636: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-07-14 11:28:26.713646: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-07-14 11:28:26.713657: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-07-14 11:28:26.713666: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-07-14 11:28:26.713676: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-07-14 11:28:26.713685: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-07-14 11:28:26.713694: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-07-14 11:28:26.718724: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1703] Adding visible gpu devices: 0, 1, 2, 3
2020-07-14 11:28:26.718939: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-07-14 11:28:26.718947: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1108]      0 1 2 3 
2020-07-14 11:28:26.718954: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 0:   N N N N 
2020-07-14 11:28:26.718960: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 1:   N N N N 
2020-07-14 11:28:26.718966: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 2:   N N N N 
2020-07-14 11:28:26.718972: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1121] 3:   N N N N 
2020-07-14 11:28:26.722106: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10202 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:19:00.0, compute capability: 7.5)
2020-07-14 11:28:26.722803: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10202 MB memory) -> physical GPU (device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:1a:00.0, compute capability: 7.5)
2020-07-14 11:28:26.723493: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:2 with 10202 MB memory) -> physical GPU (device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:67:00.0, compute capability: 7.5)
2020-07-14 11:28:26.724198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1247] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:3 with 10178 MB memory) -> physical GPU (device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:68:00.0, compute capability: 7.5)
2020-07-14 11:28:26.730694: I tensorflow/compiler/tf2tensorrt/segment/segment.cc:460] There are 6 ops of 3 different types in the graph that are not converted to TensorRT: Identity, NoOp, Placeholder, (For more information see https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html#supported-ops).
2020-07-14 11:28:26.730770: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:643] Number of TensorRT candidate segments: 1
2020-07-14 11:28:26.731222: I tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc:745] Replaced segment 0 consisting of 12 nodes by TRTEngineOp_0.
2020-07-14 11:28:26.738247: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: tf_graph
2020-07-14 11:28:26.738281: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 22 nodes (-4), 28 edges (-8), time = 1.609ms.
2020-07-14 11:28:26.738294: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   layout: Graph size after: 22 nodes (0), 28 edges (0), time = 0.739ms.
2020-07-14 11:28:26.738305: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 22 nodes (0), 28 edges (0), time = 0.593ms.
2020-07-14 11:28:26.738316: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   TensorRTOptimizer: Graph size after: 11 nodes (-11), 12 edges (-16), time = 1.371ms.
2020-07-14 11:28:26.738327: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 9 nodes (-2), 10 edges (-2), time = 0.446ms.
2020-07-14 11:28:26.738337: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:797] Optimization results for grappler item: TRTEngineOp_0_native_segment
2020-07-14 11:28:26.738348: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.83ms.
2020-07-14 11:28:26.738359: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   layout: Graph size after: 14 nodes (0), 13 edges (0), time = 1.053ms.
2020-07-14 11:28:26.738369: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.909ms.
2020-07-14 11:28:26.738380: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   TensorRTOptimizer: Graph size after: 14 nodes (0), 13 edges (0), time = 0.078ms.
2020-07-14 11:28:26.738390: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:799]   constant_folding: Graph size after: 14 nodes (0), 13 edges (0), time = 0.933ms.
2020-07-14 11:28:26.765233: W tensorflow/core/framework/op_kernel.cc:1753] OP_REQUIRES failed at trt_engine_op.cc:563 : Invalid argument: Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0: [[28,28]] != [[?,28,28]]
[  FAILED  ] SimpleModelBuilderTest.test_session
======================================================================
ERROR: testSimpleModelConversion (__main__.SimpleModelBuilderTest)
testSimpleModelConversion (__main__.SimpleModelBuilderTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "simple_model_test.py", line 73, in setUp
    converter.build(input_fn=_my_input_fn)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/compiler/tensorrt/trt_convert.py", line 1174, in build
    func(*map(ops.convert_to_tensor, inp))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 1605, in __call__
    return self._call_impl(args, kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 1645, in _call_impl
    return self._call_flat(args, self.captured_inputs, cancellation_manager)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 1746, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 598, in call
    ctx=ctx)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py", line 60, in quick_execute
    inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError:  Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0: [[28,28]] != [[?,28,28]]
	 [[node TRTEngineOp_0 (defined at simple_model_test.py:67) ]] [Op:__inference_pruned_897]

Function call stack:
pruned


======================================================================
ERROR: test_session (__main__.SimpleModelBuilderTest)
test_session (__main__.SimpleModelBuilderTest)
Use cached_session instead. (deprecated)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "simple_model_test.py", line 73, in setUp
    converter.build(input_fn=_my_input_fn)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/compiler/tensorrt/trt_convert.py", line 1174, in build
    func(*map(ops.convert_to_tensor, inp))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 1605, in __call__
    return self._call_impl(args, kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 1645, in _call_impl
    return self._call_flat(args, self.captured_inputs, cancellation_manager)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 1746, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 598, in call
    ctx=ctx)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py", line 60, in quick_execute
    inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError:  Input shapes do not match input partial shapes stored in graph, for TRTEngineOp_0: [[28,28]] != [[?,28,28]]
	 [[node TRTEngineOp_0 (defined at simple_model_test.py:67) ]] [Op:__inference_pruned_1794]

Function call stack:
pruned


----------------------------------------------------------------------
Ran 2 tests in 3.234s

FAILED (errors=2)

Btw, this forum software is awful for posting.