I have tried using torch.onnx.export
to convert this model from torch to onnx and converting from onnx to TensorRT using trtexec
My code for converting from torch to onnx is the following:
import torch
from m2unet import M2UNet, Encoder
def main():
modelpath = "models/flat_model_56_10.pth"
savepath = "models/flat_model_56_10.onnx"
NCH = 1
BATCHSZ = 1
im_sz = 1024
v = 12
sz = (BATCHSZ, NCH, im_sz, im_sz) # size of input - batch, channels, x, y
# define your PyTorch model and load the weights
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
encoder = Encoder(NUM_CHAN=NCH)
model = M2UNet(encoder, NUM_CHAN=NCH).float().to(device).cuda().eval()
model.load_state_dict(torch.load(modelpath))
# create an example input
input_data = torch.ones(sz).cuda()
# export
torch.onnx.export(model, input_data, savepath, verbose=True, opset_version=v)
if __name__ == "__main__":
main()
Running this produced the following logs:
/home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/onnx/_internal/jit_utils.py:258: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.)
_C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)
/home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/onnx/utils.py:687: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.)
_C._jit_pass_onnx_graph_shape_type_inference(
/home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/onnx/utils.py:1178: UserWarning: The shape inference of prim::Constant type is missing, so it may result in wrong shape inference for the exported graph. Please consider adding it in symbolic function. (Triggered internally at ../torch/csrc/jit/passes/onnx/shape_type_inference.cpp:1884.)
_C._jit_pass_onnx_graph_shape_type_inference(
Exported graph: graph(%input.1 : Float(1, 1, 1024, 1024, strides=[1048576, 1048576, 1024, 1], requires_grad=0, device=cuda:0),
%decode1.conv.0.weight : Float(1, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=1, device=cuda:0),
%decode1.conv.0.bias : Float(1, strides=[1], requires_grad=1, device=cuda:0),
%onnx::Conv_479 : Float(32, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_480 : Float(32, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_482 : Float(32, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_483 : Float(32, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_485 : Float(16, 32, 1, 1, strides=[32, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_486 : Float(16, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_488 : Float(96, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_489 : Float(96, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_491 : Float(96, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_492 : Float(96, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_494 : Float(24, 96, 1, 1, strides=[96, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_495 : Float(24, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_497 : Float(144, 24, 1, 1, strides=[24, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_498 : Float(144, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_500 : Float(144, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_501 : Float(144, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_503 : Float(24, 144, 1, 1, strides=[144, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_504 : Float(24, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_506 : Float(144, 24, 1, 1, strides=[24, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_507 : Float(144, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_509 : Float(144, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_510 : Float(144, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_512 : Float(32, 144, 1, 1, strides=[144, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_513 : Float(32, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_515 : Float(192, 32, 1, 1, strides=[32, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_516 : Float(192, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_518 : Float(192, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_519 : Float(192, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_521 : Float(32, 192, 1, 1, strides=[192, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_522 : Float(32, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_524 : Float(192, 32, 1, 1, strides=[32, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_525 : Float(192, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_527 : Float(192, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_528 : Float(192, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_530 : Float(32, 192, 1, 1, strides=[192, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_531 : Float(32, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_533 : Float(192, 32, 1, 1, strides=[32, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_534 : Float(192, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_536 : Float(192, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_537 : Float(192, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_539 : Float(64, 192, 1, 1, strides=[192, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_540 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_542 : Float(384, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_543 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_545 : Float(384, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_546 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_548 : Float(64, 384, 1, 1, strides=[384, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_549 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_551 : Float(384, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_552 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_554 : Float(384, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_555 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_557 : Float(64, 384, 1, 1, strides=[384, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_558 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_560 : Float(384, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_561 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_563 : Float(384, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_564 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_566 : Float(64, 384, 1, 1, strides=[384, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_567 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_569 : Float(384, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_570 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_572 : Float(384, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_573 : Float(384, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_575 : Float(96, 384, 1, 1, strides=[384, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_576 : Float(96, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_578 : Float(576, 96, 1, 1, strides=[96, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_579 : Float(576, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_581 : Float(576, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_582 : Float(576, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_584 : Float(96, 576, 1, 1, strides=[576, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_585 : Float(96, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_587 : Float(576, 96, 1, 1, strides=[96, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_588 : Float(576, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_590 : Float(576, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_591 : Float(576, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_593 : Float(96, 576, 1, 1, strides=[576, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_594 : Float(96, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_596 : Float(19, 128, 1, 1, strides=[128, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_597 : Float(19, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_599 : Float(19, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_600 : Float(19, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_602 : Float(64, 19, 1, 1, strides=[19, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_603 : Float(64, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_605 : Float(13, 88, 1, 1, strides=[88, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_606 : Float(13, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_608 : Float(13, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_609 : Float(13, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_611 : Float(44, 13, 1, 1, strides=[13, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_612 : Float(44, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_614 : Float(9, 60, 1, 1, strides=[60, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_615 : Float(9, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_617 : Float(9, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_618 : Float(9, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_620 : Float(30, 9, 1, 1, strides=[9, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_621 : Float(30, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_623 : Float(5, 31, 1, 1, strides=[31, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_624 : Float(5, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_626 : Float(5, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_627 : Float(5, strides=[1], requires_grad=0, device=cuda:0),
%onnx::Conv_629 : Float(16, 5, 1, 1, strides=[5, 1, 1, 1], requires_grad=0, device=cuda:0),
%onnx::Conv_630 : Float(16, strides=[1], requires_grad=0, device=cuda:0)):
%/conv1/conv1.0/conv1.0.0/Conv_output_0 : Float(1, 32, 512, 512, strides=[8388608, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2], onnx_name="/conv1/conv1.0/conv1.0.0/Conv"](%input.1, %onnx::Conv_479, %onnx::Conv_480), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv1/torch.nn.modules.container.Sequential::conv1.0/torch.nn.modules.conv.Conv2d::conv1.0.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv1/conv1.0/conv1.0.2/Relu_output_0 : Float(1, 32, 512, 512, strides=[8388608, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv1/conv1.0/conv1.0.2/Relu"](%/conv1/conv1.0/conv1.0.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv1/torch.nn.modules.container.Sequential::conv1.0/torch.nn.modules.activation.ReLU::conv1.0.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv1/conv1.1/conv/conv.0/Conv_output_0 : Float(1, 32, 512, 512, strides=[8388608, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=32, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv1/conv1.1/conv/conv.0/Conv"](%/conv1/conv1.0/conv1.0.2/Relu_output_0, %onnx::Conv_482, %onnx::Conv_483), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv1/m2unet.InvertedResidual::conv1.1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv1/conv1.1/conv/conv.2/Relu_output_0 : Float(1, 32, 512, 512, strides=[8388608, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv1/conv1.1/conv/conv.2/Relu"](%/conv1/conv1.1/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv1/m2unet.InvertedResidual::conv1.1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv1/conv1.1/conv/conv.3/Conv_output_0 : Float(1, 16, 512, 512, strides=[4194304, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv1/conv1.1/conv/conv.3/Conv"](%/conv1/conv1.1/conv/conv.2/Relu_output_0, %onnx::Conv_485, %onnx::Conv_486), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv1/m2unet.InvertedResidual::conv1.1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv2/conv2.2/conv/conv.0/Conv_output_0 : Float(1, 96, 512, 512, strides=[25165824, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv2/conv2.2/conv/conv.0/Conv"](%/conv1/conv1.1/conv/conv.3/Conv_output_0, %onnx::Conv_488, %onnx::Conv_489), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.2/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv2/conv2.2/conv/conv.2/Relu_output_0 : Float(1, 96, 512, 512, strides=[25165824, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv2/conv2.2/conv/conv.2/Relu"](%/conv2/conv2.2/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.2/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv2/conv2.2/conv/conv.3/Conv_output_0 : Float(1, 96, 256, 256, strides=[6291456, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=96, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2], onnx_name="/conv2/conv2.2/conv/conv.3/Conv"](%/conv2/conv2.2/conv/conv.2/Relu_output_0, %onnx::Conv_491, %onnx::Conv_492), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.2/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv2/conv2.2/conv/conv.5/Relu_output_0 : Float(1, 96, 256, 256, strides=[6291456, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv2/conv2.2/conv/conv.5/Relu"](%/conv2/conv2.2/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.2/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv2/conv2.2/conv/conv.6/Conv_output_0 : Float(1, 24, 256, 256, strides=[1572864, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv2/conv2.2/conv/conv.6/Conv"](%/conv2/conv2.2/conv/conv.5/Relu_output_0, %onnx::Conv_494, %onnx::Conv_495), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.2/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv2/conv2.3/conv/conv.0/Conv_output_0 : Float(1, 144, 256, 256, strides=[9437184, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv2/conv2.3/conv/conv.0/Conv"](%/conv2/conv2.2/conv/conv.6/Conv_output_0, %onnx::Conv_497, %onnx::Conv_498), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.3/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv2/conv2.3/conv/conv.2/Relu_output_0 : Float(1, 144, 256, 256, strides=[9437184, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv2/conv2.3/conv/conv.2/Relu"](%/conv2/conv2.3/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.3/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv2/conv2.3/conv/conv.3/Conv_output_0 : Float(1, 144, 256, 256, strides=[9437184, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=144, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv2/conv2.3/conv/conv.3/Conv"](%/conv2/conv2.3/conv/conv.2/Relu_output_0, %onnx::Conv_500, %onnx::Conv_501), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.3/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv2/conv2.3/conv/conv.5/Relu_output_0 : Float(1, 144, 256, 256, strides=[9437184, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv2/conv2.3/conv/conv.5/Relu"](%/conv2/conv2.3/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.3/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv2/conv2.3/conv/conv.6/Conv_output_0 : Float(1, 24, 256, 256, strides=[1572864, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv2/conv2.3/conv/conv.6/Conv"](%/conv2/conv2.3/conv/conv.5/Relu_output_0, %onnx::Conv_503, %onnx::Conv_504), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.3/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv2/conv2.3/Add_output_0 : Float(1, 24, 256, 256, strides=[1572864, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv2/conv2.3/Add"](%/conv2/conv2.2/conv/conv.6/Conv_output_0, %/conv2/conv2.3/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv2/m2unet.InvertedResidual::conv2.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/conv3/conv3.4/conv/conv.0/Conv_output_0 : Float(1, 144, 256, 256, strides=[9437184, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv3/conv3.4/conv/conv.0/Conv"](%/conv2/conv2.3/Add_output_0, %onnx::Conv_506, %onnx::Conv_507), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.4/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.4/conv/conv.2/Relu_output_0 : Float(1, 144, 256, 256, strides=[9437184, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv3/conv3.4/conv/conv.2/Relu"](%/conv3/conv3.4/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.4/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv3/conv3.4/conv/conv.3/Conv_output_0 : Float(1, 144, 128, 128, strides=[2359296, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=144, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2], onnx_name="/conv3/conv3.4/conv/conv.3/Conv"](%/conv3/conv3.4/conv/conv.2/Relu_output_0, %onnx::Conv_509, %onnx::Conv_510), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.4/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.4/conv/conv.5/Relu_output_0 : Float(1, 144, 128, 128, strides=[2359296, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv3/conv3.4/conv/conv.5/Relu"](%/conv3/conv3.4/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.4/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv3/conv3.4/conv/conv.6/Conv_output_0 : Float(1, 32, 128, 128, strides=[524288, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv3/conv3.4/conv/conv.6/Conv"](%/conv3/conv3.4/conv/conv.5/Relu_output_0, %onnx::Conv_512, %onnx::Conv_513), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.4/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.5/conv/conv.0/Conv_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv3/conv3.5/conv/conv.0/Conv"](%/conv3/conv3.4/conv/conv.6/Conv_output_0, %onnx::Conv_515, %onnx::Conv_516), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.5/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.5/conv/conv.2/Relu_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv3/conv3.5/conv/conv.2/Relu"](%/conv3/conv3.5/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.5/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv3/conv3.5/conv/conv.3/Conv_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=192, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv3/conv3.5/conv/conv.3/Conv"](%/conv3/conv3.5/conv/conv.2/Relu_output_0, %onnx::Conv_518, %onnx::Conv_519), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.5/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.5/conv/conv.5/Relu_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv3/conv3.5/conv/conv.5/Relu"](%/conv3/conv3.5/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.5/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv3/conv3.5/conv/conv.6/Conv_output_0 : Float(1, 32, 128, 128, strides=[524288, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv3/conv3.5/conv/conv.6/Conv"](%/conv3/conv3.5/conv/conv.5/Relu_output_0, %onnx::Conv_521, %onnx::Conv_522), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.5/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.5/Add_output_0 : Float(1, 32, 128, 128, strides=[524288, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv3/conv3.5/Add"](%/conv3/conv3.4/conv/conv.6/Conv_output_0, %/conv3/conv3.5/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/conv3/conv3.6/conv/conv.0/Conv_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv3/conv3.6/conv/conv.0/Conv"](%/conv3/conv3.5/Add_output_0, %onnx::Conv_524, %onnx::Conv_525), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.6/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.6/conv/conv.2/Relu_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv3/conv3.6/conv/conv.2/Relu"](%/conv3/conv3.6/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.6/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv3/conv3.6/conv/conv.3/Conv_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=192, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv3/conv3.6/conv/conv.3/Conv"](%/conv3/conv3.6/conv/conv.2/Relu_output_0, %onnx::Conv_527, %onnx::Conv_528), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.6/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.6/conv/conv.5/Relu_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv3/conv3.6/conv/conv.5/Relu"](%/conv3/conv3.6/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.6/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv3/conv3.6/conv/conv.6/Conv_output_0 : Float(1, 32, 128, 128, strides=[524288, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv3/conv3.6/conv/conv.6/Conv"](%/conv3/conv3.6/conv/conv.5/Relu_output_0, %onnx::Conv_530, %onnx::Conv_531), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.6/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv3/conv3.6/Add_output_0 : Float(1, 32, 128, 128, strides=[524288, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv3/conv3.6/Add"](%/conv3/conv3.5/Add_output_0, %/conv3/conv3.6/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv3/m2unet.InvertedResidual::conv3.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/conv4/conv4.7/conv/conv.0/Conv_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.7/conv/conv.0/Conv"](%/conv3/conv3.6/Add_output_0, %onnx::Conv_533, %onnx::Conv_534), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.7/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.7/conv/conv.2/Relu_output_0 : Float(1, 192, 128, 128, strides=[3145728, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.7/conv/conv.2/Relu"](%/conv4/conv4.7/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.7/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.7/conv/conv.3/Conv_output_0 : Float(1, 192, 64, 64, strides=[786432, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=192, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2], onnx_name="/conv4/conv4.7/conv/conv.3/Conv"](%/conv4/conv4.7/conv/conv.2/Relu_output_0, %onnx::Conv_536, %onnx::Conv_537), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.7/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.7/conv/conv.5/Relu_output_0 : Float(1, 192, 64, 64, strides=[786432, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.7/conv/conv.5/Relu"](%/conv4/conv4.7/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.7/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.7/conv/conv.6/Conv_output_0 : Float(1, 64, 64, 64, strides=[262144, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.7/conv/conv.6/Conv"](%/conv4/conv4.7/conv/conv.5/Relu_output_0, %onnx::Conv_539, %onnx::Conv_540), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.7/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.8/conv/conv.0/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.8/conv/conv.0/Conv"](%/conv4/conv4.7/conv/conv.6/Conv_output_0, %onnx::Conv_542, %onnx::Conv_543), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.8/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.8/conv/conv.2/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.8/conv/conv.2/Relu"](%/conv4/conv4.8/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.8/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.8/conv/conv.3/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=384, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv4/conv4.8/conv/conv.3/Conv"](%/conv4/conv4.8/conv/conv.2/Relu_output_0, %onnx::Conv_545, %onnx::Conv_546), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.8/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.8/conv/conv.5/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.8/conv/conv.5/Relu"](%/conv4/conv4.8/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.8/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.8/conv/conv.6/Conv_output_0 : Float(1, 64, 64, 64, strides=[262144, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.8/conv/conv.6/Conv"](%/conv4/conv4.8/conv/conv.5/Relu_output_0, %onnx::Conv_548, %onnx::Conv_549), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.8/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.8/Add_output_0 : Float(1, 64, 64, 64, strides=[262144, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv4/conv4.8/Add"](%/conv4/conv4.7/conv/conv.6/Conv_output_0, %/conv4/conv4.8/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.8 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/conv4/conv4.9/conv/conv.0/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.9/conv/conv.0/Conv"](%/conv4/conv4.8/Add_output_0, %onnx::Conv_551, %onnx::Conv_552), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.9/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.9/conv/conv.2/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.9/conv/conv.2/Relu"](%/conv4/conv4.9/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.9/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.9/conv/conv.3/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=384, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv4/conv4.9/conv/conv.3/Conv"](%/conv4/conv4.9/conv/conv.2/Relu_output_0, %onnx::Conv_554, %onnx::Conv_555), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.9/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.9/conv/conv.5/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.9/conv/conv.5/Relu"](%/conv4/conv4.9/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.9/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.9/conv/conv.6/Conv_output_0 : Float(1, 64, 64, 64, strides=[262144, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.9/conv/conv.6/Conv"](%/conv4/conv4.9/conv/conv.5/Relu_output_0, %onnx::Conv_557, %onnx::Conv_558), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.9/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.9/Add_output_0 : Float(1, 64, 64, 64, strides=[262144, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv4/conv4.9/Add"](%/conv4/conv4.8/Add_output_0, %/conv4/conv4.9/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.9 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/conv4/conv4.10/conv/conv.0/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.10/conv/conv.0/Conv"](%/conv4/conv4.9/Add_output_0, %onnx::Conv_560, %onnx::Conv_561), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.10/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.10/conv/conv.2/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.10/conv/conv.2/Relu"](%/conv4/conv4.10/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.10/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.10/conv/conv.3/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=384, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv4/conv4.10/conv/conv.3/Conv"](%/conv4/conv4.10/conv/conv.2/Relu_output_0, %onnx::Conv_563, %onnx::Conv_564), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.10/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.10/conv/conv.5/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.10/conv/conv.5/Relu"](%/conv4/conv4.10/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.10/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.10/conv/conv.6/Conv_output_0 : Float(1, 64, 64, 64, strides=[262144, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.10/conv/conv.6/Conv"](%/conv4/conv4.10/conv/conv.5/Relu_output_0, %onnx::Conv_566, %onnx::Conv_567), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.10/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.10/Add_output_0 : Float(1, 64, 64, 64, strides=[262144, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv4/conv4.10/Add"](%/conv4/conv4.9/Add_output_0, %/conv4/conv4.10/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.10 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/conv4/conv4.11/conv/conv.0/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.11/conv/conv.0/Conv"](%/conv4/conv4.10/Add_output_0, %onnx::Conv_569, %onnx::Conv_570), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.11/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.11/conv/conv.2/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.11/conv/conv.2/Relu"](%/conv4/conv4.11/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.11/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.11/conv/conv.3/Conv_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=384, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv4/conv4.11/conv/conv.3/Conv"](%/conv4/conv4.11/conv/conv.2/Relu_output_0, %onnx::Conv_572, %onnx::Conv_573), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.11/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.11/conv/conv.5/Relu_output_0 : Float(1, 384, 64, 64, strides=[1572864, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.11/conv/conv.5/Relu"](%/conv4/conv4.11/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.11/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.11/conv/conv.6/Conv_output_0 : Float(1, 96, 64, 64, strides=[393216, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.11/conv/conv.6/Conv"](%/conv4/conv4.11/conv/conv.5/Relu_output_0, %onnx::Conv_575, %onnx::Conv_576), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.11/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.12/conv/conv.0/Conv_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.12/conv/conv.0/Conv"](%/conv4/conv4.11/conv/conv.6/Conv_output_0, %onnx::Conv_578, %onnx::Conv_579), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.12/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.12/conv/conv.2/Relu_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.12/conv/conv.2/Relu"](%/conv4/conv4.12/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.12/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.12/conv/conv.3/Conv_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=576, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv4/conv4.12/conv/conv.3/Conv"](%/conv4/conv4.12/conv/conv.2/Relu_output_0, %onnx::Conv_581, %onnx::Conv_582), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.12/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.12/conv/conv.5/Relu_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.12/conv/conv.5/Relu"](%/conv4/conv4.12/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.12/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.12/conv/conv.6/Conv_output_0 : Float(1, 96, 64, 64, strides=[393216, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.12/conv/conv.6/Conv"](%/conv4/conv4.12/conv/conv.5/Relu_output_0, %onnx::Conv_584, %onnx::Conv_585), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.12/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.12/Add_output_0 : Float(1, 96, 64, 64, strides=[393216, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv4/conv4.12/Add"](%/conv4/conv4.11/conv/conv.6/Conv_output_0, %/conv4/conv4.12/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.12 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/conv4/conv4.13/conv/conv.0/Conv_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.13/conv/conv.0/Conv"](%/conv4/conv4.12/Add_output_0, %onnx::Conv_587, %onnx::Conv_588), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.13/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.13/conv/conv.2/Relu_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.13/conv/conv.2/Relu"](%/conv4/conv4.13/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.13/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.13/conv/conv.3/Conv_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=576, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/conv4/conv4.13/conv/conv.3/Conv"](%/conv4/conv4.13/conv/conv.2/Relu_output_0, %onnx::Conv_590, %onnx::Conv_591), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.13/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.13/conv/conv.5/Relu_output_0 : Float(1, 576, 64, 64, strides=[2359296, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/conv4/conv4.13/conv/conv.5/Relu"](%/conv4/conv4.13/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.13/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/conv4/conv4.13/conv/conv.6/Conv_output_0 : Float(1, 96, 64, 64, strides=[393216, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv4/conv4.13/conv/conv.6/Conv"](%/conv4/conv4.13/conv/conv.5/Relu_output_0, %onnx::Conv_593, %onnx::Conv_594), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.13/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/conv4/conv4.13/Add_output_0 : Float(1, 96, 64, 64, strides=[393216, 4096, 64, 1], requires_grad=1, device=cuda:0) = onnx::Add[onnx_name="/conv4/conv4.13/Add"](%/conv4/conv4.12/Add_output_0, %/conv4/conv4.13/conv/conv.6/Conv_output_0), scope: m2unet.M2UNet::/torch.nn.modules.container.Sequential::conv4/m2unet.InvertedResidual::conv4.13 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:45:0
%/decode4/upsample/Constant_output_0 : Float(4, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 1 2 2 [ CPUFloatType{4} ], onnx_name="/decode4/upsample/Constant"](), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%onnx::Resize_424 : Tensor? = prim::Constant(), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode4/upsample/Resize_output_0 : Float(1, 96, 128, 128, strides=[1572864, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Resize[coordinate_transformation_mode="half_pixel", cubic_coeff_a=-0.75, mode="linear", nearest_mode="floor", onnx_name="/decode4/upsample/Resize"](%/conv4/conv4.13/Add_output_0, %onnx::Resize_424, %/decode4/upsample/Constant_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode4/Concat_output_0 : Float(1, 128, 128, 128, strides=[2097152, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Concat[axis=1, onnx_name="/decode4/Concat"](%/decode4/upsample/Resize_output_0, %/conv3/conv3.6/Add_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:71:0
%/decode4/ir1/conv/conv.0/Conv_output_0 : Float(1, 19, 128, 128, strides=[311296, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode4/ir1/conv/conv.0/Conv"](%/decode4/Concat_output_0, %onnx::Conv_596, %onnx::Conv_597), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode4/ir1/conv/conv.2/Relu_output_0 : Float(1, 19, 128, 128, strides=[311296, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode4/ir1/conv/conv.2/Relu"](%/decode4/ir1/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode4/ir1/conv/conv.3/Conv_output_0 : Float(1, 19, 128, 128, strides=[311296, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=19, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/decode4/ir1/conv/conv.3/Conv"](%/decode4/ir1/conv/conv.2/Relu_output_0, %onnx::Conv_599, %onnx::Conv_600), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode4/ir1/conv/conv.5/Relu_output_0 : Float(1, 19, 128, 128, strides=[311296, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode4/ir1/conv/conv.5/Relu"](%/decode4/ir1/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode4/ir1/conv/conv.6/Conv_output_0 : Float(1, 64, 128, 128, strides=[1048576, 16384, 128, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode4/ir1/conv/conv.6/Conv"](%/decode4/ir1/conv/conv.5/Relu_output_0, %onnx::Conv_602, %onnx::Conv_603), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode4/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode3/upsample/Constant_output_0 : Float(4, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 1 2 2 [ CPUFloatType{4} ], onnx_name="/decode3/upsample/Constant"](), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%onnx::Resize_438 : Tensor? = prim::Constant(), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode3/upsample/Resize_output_0 : Float(1, 64, 256, 256, strides=[4194304, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Resize[coordinate_transformation_mode="half_pixel", cubic_coeff_a=-0.75, mode="linear", nearest_mode="floor", onnx_name="/decode3/upsample/Resize"](%/decode4/ir1/conv/conv.6/Conv_output_0, %onnx::Resize_438, %/decode3/upsample/Constant_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode3/Concat_output_0 : Float(1, 88, 256, 256, strides=[5767168, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Concat[axis=1, onnx_name="/decode3/Concat"](%/decode3/upsample/Resize_output_0, %/conv2/conv2.3/Add_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:71:0
%/decode3/ir1/conv/conv.0/Conv_output_0 : Float(1, 13, 256, 256, strides=[851968, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode3/ir1/conv/conv.0/Conv"](%/decode3/Concat_output_0, %onnx::Conv_605, %onnx::Conv_606), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode3/ir1/conv/conv.2/Relu_output_0 : Float(1, 13, 256, 256, strides=[851968, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode3/ir1/conv/conv.2/Relu"](%/decode3/ir1/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode3/ir1/conv/conv.3/Conv_output_0 : Float(1, 13, 256, 256, strides=[851968, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=13, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/decode3/ir1/conv/conv.3/Conv"](%/decode3/ir1/conv/conv.2/Relu_output_0, %onnx::Conv_608, %onnx::Conv_609), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode3/ir1/conv/conv.5/Relu_output_0 : Float(1, 13, 256, 256, strides=[851968, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode3/ir1/conv/conv.5/Relu"](%/decode3/ir1/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode3/ir1/conv/conv.6/Conv_output_0 : Float(1, 44, 256, 256, strides=[2883584, 65536, 256, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode3/ir1/conv/conv.6/Conv"](%/decode3/ir1/conv/conv.5/Relu_output_0, %onnx::Conv_611, %onnx::Conv_612), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode3/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode2/upsample/Constant_output_0 : Float(4, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 1 2 2 [ CPUFloatType{4} ], onnx_name="/decode2/upsample/Constant"](), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%onnx::Resize_452 : Tensor? = prim::Constant(), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode2/upsample/Resize_output_0 : Float(1, 44, 512, 512, strides=[11534336, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Resize[coordinate_transformation_mode="half_pixel", cubic_coeff_a=-0.75, mode="linear", nearest_mode="floor", onnx_name="/decode2/upsample/Resize"](%/decode3/ir1/conv/conv.6/Conv_output_0, %onnx::Resize_452, %/decode2/upsample/Constant_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode2/Concat_output_0 : Float(1, 60, 512, 512, strides=[15728640, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Concat[axis=1, onnx_name="/decode2/Concat"](%/decode2/upsample/Resize_output_0, %/conv1/conv1.1/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:71:0
%/decode2/ir1/conv/conv.0/Conv_output_0 : Float(1, 9, 512, 512, strides=[2359296, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode2/ir1/conv/conv.0/Conv"](%/decode2/Concat_output_0, %onnx::Conv_614, %onnx::Conv_615), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode2/ir1/conv/conv.2/Relu_output_0 : Float(1, 9, 512, 512, strides=[2359296, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode2/ir1/conv/conv.2/Relu"](%/decode2/ir1/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode2/ir1/conv/conv.3/Conv_output_0 : Float(1, 9, 512, 512, strides=[2359296, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=9, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/decode2/ir1/conv/conv.3/Conv"](%/decode2/ir1/conv/conv.2/Relu_output_0, %onnx::Conv_617, %onnx::Conv_618), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode2/ir1/conv/conv.5/Relu_output_0 : Float(1, 9, 512, 512, strides=[2359296, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode2/ir1/conv/conv.5/Relu"](%/decode2/ir1/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode2/ir1/conv/conv.6/Conv_output_0 : Float(1, 30, 512, 512, strides=[7864320, 262144, 512, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode2/ir1/conv/conv.6/Conv"](%/decode2/ir1/conv/conv.5/Relu_output_0, %onnx::Conv_620, %onnx::Conv_621), scope: m2unet.M2UNet::/m2unet.DecoderBlock::decode2/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode1/upsample/Constant_output_0 : Float(4, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 1 2 2 [ CPUFloatType{4} ], onnx_name="/decode1/upsample/Constant"](), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%onnx::Resize_466 : Tensor? = prim::Constant(), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode1/upsample/Resize_output_0 : Float(1, 30, 1024, 1024, strides=[31457280, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Resize[coordinate_transformation_mode="half_pixel", cubic_coeff_a=-0.75, mode="linear", nearest_mode="floor", onnx_name="/decode1/upsample/Resize"](%/decode2/ir1/conv/conv.6/Conv_output_0, %onnx::Resize_466, %/decode1/upsample/Constant_output_0), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/torch.nn.modules.upsampling.Upsample::upsample # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:3919:0
%/decode1/Concat_output_0 : Float(1, 31, 1024, 1024, strides=[32505856, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Concat[axis=1, onnx_name="/decode1/Concat"](%/decode1/upsample/Resize_output_0, %input.1), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/ao/nn/quantized/modules/functional_modules.py:71:0
%/decode1/ir1/conv/conv.0/Conv_output_0 : Float(1, 5, 1024, 1024, strides=[5242880, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode1/ir1/conv/conv.0/Conv"](%/decode1/Concat_output_0, %onnx::Conv_623, %onnx::Conv_624), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode1/ir1/conv/conv.2/Relu_output_0 : Float(1, 5, 1024, 1024, strides=[5242880, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode1/ir1/conv/conv.2/Relu"](%/decode1/ir1/conv/conv.0/Conv_output_0), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.2 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode1/ir1/conv/conv.3/Conv_output_0 : Float(1, 5, 1024, 1024, strides=[5242880, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=5, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/decode1/ir1/conv/conv.3/Conv"](%/decode1/ir1/conv/conv.2/Relu_output_0, %onnx::Conv_626, %onnx::Conv_627), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.3 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%/decode1/ir1/conv/conv.5/Relu_output_0 : Float(1, 5, 1024, 1024, strides=[5242880, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Relu[onnx_name="/decode1/ir1/conv/conv.5/Relu"](%/decode1/ir1/conv/conv.3/Conv_output_0), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.activation.ReLU::conv.5 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
%/decode1/ir1/conv/conv.6/Conv_output_0 : Float(1, 16, 1024, 1024, strides=[16777216, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode1/ir1/conv/conv.6/Conv"](%/decode1/ir1/conv/conv.5/Relu_output_0, %onnx::Conv_629, %onnx::Conv_630), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/m2unet.InvertedResidual::ir1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.6 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
%477 : Float(1, 1, 1024, 1024, strides=[1048576, 1048576, 1024, 1], requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/decode1/conv/conv.0/Conv"](%/decode1/ir1/conv/conv.6/Conv_output_0, %decode1.conv.0.weight, %decode1.conv.0.bias), scope: m2unet.M2UNet::/m2unet.LastDecoderBlock::decode1/torch.nn.modules.container.Sequential::conv/torch.nn.modules.conv.Conv2d::conv.0 # /home/prakashlab/miniconda3/envs/tensorRT/lib/python3.10/site-packages/torch/nn/modules/conv.py:458:0
return (%477)
However, it did produce an onnx file as an output.
I then ran trtexec --onnx=models/flat_model_56_10.onnx --saveEngine=models/flat_model_56_10_from_onnx.trt --explicitBatch
to convert from onnx to trt on my Jetson Xavier NX. I got the following logs:
&&&& RUNNING TensorRT.trtexec [TensorRT v8401] # trtexec --onnx=models/flat_model_56_10.onnx --saveEngine=models/flat_model_56_10_from_onnx.trt --explicitBatch
[03/16/2023-15:48:13] [W] --explicitBatch flag has been deprecated and has no effect!
[03/16/2023-15:48:13] [W] Explicit batch dim is automatically enabled if input model is ONNX or if dynamic shapes are provided when the engine is built.
[03/16/2023-15:48:13] [I] === Model Options ===
[03/16/2023-15:48:13] [I] Format: ONNX
[03/16/2023-15:48:13] [I] Model: /media/prakashlab/SSD/quantize_m2unet/models/flat_model_56_10.onnx
[03/16/2023-15:48:13] [I] Output:
[03/16/2023-15:48:13] [I] === Build Options ===
[03/16/2023-15:48:13] [I] Max batch: explicit batch
[03/16/2023-15:48:13] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default
[03/16/2023-15:48:13] [I] minTiming: 1
[03/16/2023-15:48:13] [I] avgTiming: 8
[03/16/2023-15:48:13] [I] Precision: FP32
[03/16/2023-15:48:13] [I] LayerPrecisions:
[03/16/2023-15:48:13] [I] Calibration:
[03/16/2023-15:48:13] [I] Refit: Disabled
[03/16/2023-15:48:13] [I] Sparsity: Disabled
[03/16/2023-15:48:13] [I] Safe mode: Disabled
[03/16/2023-15:48:13] [I] DirectIO mode: Disabled
[03/16/2023-15:48:13] [I] Restricted mode: Disabled
[03/16/2023-15:48:13] [I] Build only: Disabled
[03/16/2023-15:48:13] [I] Save engine: /media/prakashlab/SSD/quantize_m2unet/models/flat_model_56_10_from_onnx.trt
[03/16/2023-15:48:13] [I] Load engine:
[03/16/2023-15:48:13] [I] Profiling verbosity: 0
[03/16/2023-15:48:13] [I] Tactic sources: Using default tactic sources
[03/16/2023-15:48:13] [I] timingCacheMode: local
[03/16/2023-15:48:13] [I] timingCacheFile:
[03/16/2023-15:48:13] [I] Input(s)s format: fp32:CHW
[03/16/2023-15:48:13] [I] Output(s)s format: fp32:CHW
[03/16/2023-15:48:13] [I] Input build shapes: model
[03/16/2023-15:48:13] [I] Input calibration shapes: model
[03/16/2023-15:48:13] [I] === System Options ===
[03/16/2023-15:48:13] [I] Device: 0
[03/16/2023-15:48:13] [I] DLACore:
[03/16/2023-15:48:13] [I] Plugins:
[03/16/2023-15:48:13] [I] === Inference Options ===
[03/16/2023-15:48:13] [I] Batch: Explicit
[03/16/2023-15:48:13] [I] Input inference shapes: model
[03/16/2023-15:48:13] [I] Iterations: 10
[03/16/2023-15:48:13] [I] Duration: 3s (+ 200ms warm up)
[03/16/2023-15:48:13] [I] Sleep time: 0ms
[03/16/2023-15:48:13] [I] Idle time: 0ms
[03/16/2023-15:48:13] [I] Streams: 1
[03/16/2023-15:48:13] [I] ExposeDMA: Disabled
[03/16/2023-15:48:13] [I] Data transfers: Enabled
[03/16/2023-15:48:13] [I] Spin-wait: Disabled
[03/16/2023-15:48:13] [I] Multithreading: Disabled
[03/16/2023-15:48:13] [I] CUDA Graph: Disabled
[03/16/2023-15:48:13] [I] Separate profiling: Disabled
[03/16/2023-15:48:13] [I] Time Deserialize: Disabled
[03/16/2023-15:48:13] [I] Time Refit: Disabled
[03/16/2023-15:48:13] [I] Inputs:
[03/16/2023-15:48:13] [I] === Reporting Options ===
[03/16/2023-15:48:13] [I] Verbose: Disabled
[03/16/2023-15:48:13] [I] Averages: 10 inferences
[03/16/2023-15:48:13] [I] Percentile: 99
[03/16/2023-15:48:13] [I] Dump refittable layers:Disabled
[03/16/2023-15:48:13] [I] Dump output: Disabled
[03/16/2023-15:48:13] [I] Profile: Disabled
[03/16/2023-15:48:13] [I] Export timing to JSON file:
[03/16/2023-15:48:13] [I] Export output to JSON file:
[03/16/2023-15:48:13] [I] Export profile to JSON file:
[03/16/2023-15:48:13] [I]
[03/16/2023-15:48:14] [I] === Device Information ===
[03/16/2023-15:48:14] [I] Selected Device: Xavier
[03/16/2023-15:48:14] [I] Compute Capability: 7.2
[03/16/2023-15:48:14] [I] SMs: 6
[03/16/2023-15:48:14] [I] Compute Clock Rate: 1.109 GHz
[03/16/2023-15:48:14] [I] Device Global Memory: 6846 MiB
[03/16/2023-15:48:14] [I] Shared Memory per SM: 96 KiB
[03/16/2023-15:48:14] [I] Memory Bus Width: 256 bits (ECC disabled)
[03/16/2023-15:48:14] [I] Memory Clock Rate: 1.109 GHz
[03/16/2023-15:48:14] [I]
[03/16/2023-15:48:14] [I] TensorRT version: 8.4.1
[03/16/2023-15:48:15] [I] [TRT] [MemUsageChange] Init CUDA: CPU +185, GPU +0, now: CPU 209, GPU 3118 (MiB)
[03/16/2023-15:48:19] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +131, GPU +124, now: CPU 359, GPU 3255 (MiB)
[03/16/2023-15:48:19] [I] Start parsing network model
[03/16/2023-15:48:19] [I] [TRT] ----------------------------------------------------------------
[03/16/2023-15:48:19] [I] [TRT] Input filename: /media/prakashlab/SSD/quantize_m2unet/models/flat_model_56_10.onnx
[03/16/2023-15:48:19] [I] [TRT] ONNX IR version: 0.0.7
[03/16/2023-15:48:19] [I] [TRT] Opset version: 12
[03/16/2023-15:48:19] [I] [TRT] Producer name: pytorch
[03/16/2023-15:48:19] [I] [TRT] Producer version: 1.13.0
[03/16/2023-15:48:19] [I] [TRT] Domain:
[03/16/2023-15:48:19] [I] [TRT] Model version: 0
[03/16/2023-15:48:19] [I] [TRT] Doc string:
[03/16/2023-15:48:19] [I] [TRT] ----------------------------------------------------------------
[03/16/2023-15:48:19] [I] Finish parsing network model
[03/16/2023-15:48:20] [I] [TRT] ---------- Layers Running on DLA ----------
[03/16/2023-15:48:20] [I] [TRT] ---------- Layers Running on GPU ----------
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: Reformatting CopyNode for Network Input input.1
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv1/conv1.0/conv1.0.0/Conv + /conv1/conv1.0/conv1.0.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv1/conv1.1/conv/conv.0/Conv + /conv1/conv1.1/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv1/conv1.1/conv/conv.3/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv2/conv2.2/conv/conv.0/Conv + /conv2/conv2.2/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv2/conv2.2/conv/conv.3/Conv + /conv2/conv2.2/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv2/conv2.2/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv2/conv2.3/conv/conv.0/Conv + /conv2/conv2.3/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv2/conv2.3/conv/conv.3/Conv + /conv2/conv2.3/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv2/conv2.3/conv/conv.6/Conv + /conv2/conv2.3/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.4/conv/conv.0/Conv + /conv3/conv3.4/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.4/conv/conv.3/Conv + /conv3/conv3.4/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.4/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.5/conv/conv.0/Conv + /conv3/conv3.5/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.5/conv/conv.3/Conv + /conv3/conv3.5/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.5/conv/conv.6/Conv + /conv3/conv3.5/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.6/conv/conv.0/Conv + /conv3/conv3.6/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.6/conv/conv.3/Conv + /conv3/conv3.6/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv3/conv3.6/conv/conv.6/Conv + /conv3/conv3.6/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.7/conv/conv.0/Conv + /conv4/conv4.7/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.7/conv/conv.3/Conv + /conv4/conv4.7/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.7/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.8/conv/conv.0/Conv + /conv4/conv4.8/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.8/conv/conv.3/Conv + /conv4/conv4.8/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.8/conv/conv.6/Conv + /conv4/conv4.8/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.9/conv/conv.0/Conv + /conv4/conv4.9/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.9/conv/conv.3/Conv + /conv4/conv4.9/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.9/conv/conv.6/Conv + /conv4/conv4.9/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.10/conv/conv.0/Conv + /conv4/conv4.10/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.10/conv/conv.3/Conv + /conv4/conv4.10/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.10/conv/conv.6/Conv + /conv4/conv4.10/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.11/conv/conv.0/Conv + /conv4/conv4.11/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.11/conv/conv.3/Conv + /conv4/conv4.11/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.11/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.12/conv/conv.0/Conv + /conv4/conv4.12/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.12/conv/conv.3/Conv + /conv4/conv4.12/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.12/conv/conv.6/Conv + /conv4/conv4.12/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.13/conv/conv.0/Conv + /conv4/conv4.13/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.13/conv/conv.3/Conv + /conv4/conv4.13/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /conv4/conv4.13/conv/conv.6/Conv + /conv4/conv4.13/Add
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] RESIZE: /decode4/upsample/Resize
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: /decode4/upsample/Resize_output_0 copy
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: /conv3/conv3.6/Add_output_0 copy
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode4/ir1/conv/conv.0/Conv + /decode4/ir1/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode4/ir1/conv/conv.3/Conv + /decode4/ir1/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode4/ir1/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] RESIZE: /decode3/upsample/Resize
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: /decode3/upsample/Resize_output_0 copy
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: /conv2/conv2.3/Add_output_0 copy
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode3/ir1/conv/conv.0/Conv + /decode3/ir1/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode3/ir1/conv/conv.3/Conv + /decode3/ir1/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode3/ir1/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] RESIZE: /decode2/upsample/Resize
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: /decode2/upsample/Resize_output_0 copy
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode2/ir1/conv/conv.0/Conv + /decode2/ir1/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode2/ir1/conv/conv.3/Conv + /decode2/ir1/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode2/ir1/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] RESIZE: /decode1/upsample/Resize
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: /decode1/upsample/Resize_output_0 copy
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] COPY: input.1 copy
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode1/ir1/conv/conv.0/Conv + /decode1/ir1/conv/conv.2/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode1/ir1/conv/conv.3/Conv + /decode1/ir1/conv/conv.5/Relu
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode1/ir1/conv/conv.6/Conv
[03/16/2023-15:48:20] [I] [TRT] [GpuLayer] CONVOLUTION: /decode1/conv/conv.0/Conv
[03/16/2023-15:48:22] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +260, GPU +241, now: CPU 622, GPU 3503 (MiB)
[03/16/2023-15:48:23] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +86, GPU +82, now: CPU 708, GPU 3585 (MiB)
[03/16/2023-15:48:23] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
[03/16/2023-15:53:35] [I] [TRT] Detected 1 inputs and 1 output network tensors.
[03/16/2023-15:53:35] [I] [TRT] Total Host Persistent Memory: 84288
[03/16/2023-15:53:35] [I] [TRT] Total Device Persistent Memory: 23929856
[03/16/2023-15:53:35] [I] [TRT] Total Scratch Memory: 4194304
[03/16/2023-15:53:35] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 0 MiB, GPU 1866 MiB
[03/16/2023-15:53:35] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 17.6213ms to assign 8 blocks to 60 nodes requiring 309854212 bytes.
[03/16/2023-15:53:35] [I] [TRT] Total Activation Memory: 309854212
[03/16/2023-15:53:35] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +0, now: CPU 973, GPU 4423 (MiB)
[03/16/2023-15:53:35] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +0, GPU +26, now: CPU 0, GPU 26 (MiB)
[03/16/2023-15:53:35] [W] [TRT] The getMaxBatchSize() function should not be used with an engine built from a network created with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag. This function will always return 1.
[03/16/2023-15:53:35] [W] [TRT] The getMaxBatchSize() function should not be used with an engine built from a network created with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag. This function will always return 1.
[03/16/2023-15:53:35] [I] Engine built in 321.884 sec.
[03/16/2023-15:53:35] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 841, GPU 4379 (MiB)
[03/16/2023-15:53:35] [I] [TRT] Loaded engine size: 2 MiB
[03/16/2023-15:53:35] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 841, GPU 4379 (MiB)
[03/16/2023-15:53:35] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +24, now: CPU 0, GPU 24 (MiB)
[03/16/2023-15:53:35] [I] Engine deserialized in 0.0209206 sec.
[03/16/2023-15:53:35] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 841, GPU 4379 (MiB)
[03/16/2023-15:53:35] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +319, now: CPU 0, GPU 343 (MiB)
[03/16/2023-15:53:35] [I] Using random values for input input.1
[03/16/2023-15:53:36] [I] Created input binding for input.1 with dimensions 1x1x1024x1024
[03/16/2023-15:53:36] [I] Using random values for output 477
[03/16/2023-15:53:36] [I] Created output binding for 477 with dimensions 1x1x1024x1024
[03/16/2023-15:53:36] [I] Starting inference
[03/16/2023-15:53:39] [I] Warmup completed 4 queries over 200 ms
[03/16/2023-15:53:39] [I] Timing trace has 51 queries over 3.16491 s
[03/16/2023-15:53:39] [I]
[03/16/2023-15:53:39] [I] === Trace details ===
[03/16/2023-15:53:39] [I] Trace averages of 10 runs:
[03/16/2023-15:53:39] [I] Average on 10 runs - GPU latency: 60.9328 ms - Host latency: 61.633 ms (enqueue 1.44854 ms)
[03/16/2023-15:53:39] [I] Average on 10 runs - GPU latency: 60.8995 ms - Host latency: 61.5924 ms (enqueue 1.40194 ms)
[03/16/2023-15:53:39] [I] Average on 10 runs - GPU latency: 60.8387 ms - Host latency: 61.5485 ms (enqueue 1.43955 ms)
[03/16/2023-15:53:39] [I] Average on 10 runs - GPU latency: 60.8859 ms - Host latency: 61.5922 ms (enqueue 1.3922 ms)
[03/16/2023-15:53:39] [I] Average on 10 runs - GPU latency: 60.8665 ms - Host latency: 61.5735 ms (enqueue 1.39307 ms)
[03/16/2023-15:53:39] [I]
[03/16/2023-15:53:39] [I] === Performance summary ===
[03/16/2023-15:53:39] [I] Throughput: 16.1142 qps
[03/16/2023-15:53:39] [I] Latency: min = 60.8 ms, max = 63.9493 ms, mean = 61.5725 ms, median = 61.3909 ms, percentile(99%) = 63.9493 ms
[03/16/2023-15:53:39] [I] Enqueue Time: min = 1.23022 ms, max = 1.6532 ms, mean = 1.41308 ms, median = 1.39172 ms, percentile(99%) = 1.6532 ms
[03/16/2023-15:53:39] [I] H2D Latency: min = 0.286621 ms, max = 0.343994 ms, mean = 0.299214 ms, median = 0.295532 ms, percentile(99%) = 0.343994 ms
[03/16/2023-15:53:39] [I] GPU Compute Time: min = 60.3404 ms, max = 63.2629 ms, mean = 60.8743 ms, median = 60.6898 ms, percentile(99%) = 63.2629 ms
[03/16/2023-15:53:39] [I] D2H Latency: min = 0.156006 ms, max = 0.428345 ms, mean = 0.398971 ms, median = 0.402588 ms, percentile(99%) = 0.428345 ms
[03/16/2023-15:53:39] [I] Total Host Walltime: 3.16491 s
[03/16/2023-15:53:39] [I] Total GPU Compute Time: 3.10459 s
[03/16/2023-15:53:39] [W] * GPU compute time is unstable, with coefficient of variance = 1.23481%.
[03/16/2023-15:53:39] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability.
[03/16/2023-15:53:39] [I] Explanations of the performance metrics are printed in the verbose logs.
[03/16/2023-15:53:39] [I]
&&&& PASSED TensorRT.trtexec [TensorRT v8401] # trtexec --onnx=models/flat_model_56_10.onnx --saveEngine=models/flat_model_56_10_from_onnx.trt --explicitBatch
I tried running the model in Python using the following code:
import torch
import torch2trt
import numpy as np
def load_trt_model(path):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_trt = torch2trt.TRTModule()
model_trt.load_state_dict(torch.load(path))
return device, model_trt
def process_image_for_trt(image, device):
'''
Takes a monochrome image (x, y)
and returns an object with the correct data format
for running inference.
'''
x = np.expand_dims(image, 0)
x = np.expand_dims(x, 0)
x = torch.from_numpy(x).float()
x = x.to(device)
return x
def inference_on_image(model, image, device):
'''
Take an image and model and return the inference result
as a np array
'''
im = process_image_for_trt(image, device)
out = model(im)[0].detach().cpu().float().numpy()
return out
This code produced nonsensical outputs; instead of performing segmentation, it distorts the input in uninformative ways. Overall it looks like I am not seeing the same error as in torch2trt; I am not sure why this method is failing.