Parsing is failing for ONNX file generated from pytorch based model for U-NET on NANO.

I have converted pytorch based U-Net model into ONNX and then feeding that model to .\trtexec for benchmarking.

Below is code for U-Net model

import torch
import torch.nn as nn
import torch.nn.functional as F


class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvBlock, self).__init__()
        self.batch_norm = nn.BatchNorm2d(out_channels)
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.batch_norm(x)
        x = F.relu(self.conv2(x))
        x = self.batch_norm(x)
        return x


class Unet(nn.Module):
    def __init__(self):
        super(Unet, self).__init__()
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        self.pool4 = nn.MaxPool2d(kernel_size=2)

        self.up1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.up3 = nn.Upsample(scale_factor=2, mode='nearest')
        self.up4 = nn.Upsample(scale_factor=2, mode='nearest')

        self.conv1 = ConvBlock(1, 32)
        self.conv2 = ConvBlock(32, 64)
        self.conv3 = ConvBlock(64, 128)
        self.conv4 = ConvBlock(128, 256)

        self.conv5 = ConvBlock(256, 512)

        self.conv6 = ConvBlock(768, 256)
        self.conv7 = ConvBlock(384, 128)
        self.conv8 = ConvBlock(192, 64)
        self.conv9 = ConvBlock(96, 32)

        self.conv10 = nn.Conv2d(32, 1, 1)

    def forward(self, x):
        c1 = self.conv1(x)
        x = self.pool1(c1)
        c2 = self.conv2(x)
        x = self.pool2(c2)
        c3 = self.conv3(x)
        x = self.pool3(c3)
        c4 = self.conv4(x)
        x = self.pool4(c4)
        x = self.conv5(x)
        x = self.up1(x)
        x = torch.cat([x, c4], 1)
        x = self.conv6(x)
        x = self.up2(x)
        x = torch.cat([x, c3], 1)
        x = self.conv7(x)
        x = self.up3(x)
        x = torch.cat([x, c2], 1)
        x = self.conv8(x)
        x = self.up4(x)
        x = torch.cat([x, c1], 1)
        x = self.conv9(x)
        x = self.conv10(x)
        return x

Error Log:

[I] onnx: U_Net_ONNX.onnx
[I] fp16
----------------------------------------------------------------
Input filename:   U_Net_ONNX.onnx
ONNX IR version:  0.0.4
Opset version:    9
Producer name:    pytorch
Producer version: 1.2
Domain:           
Model version:    0
Doc string:       
----------------------------------------------------------------
WARNING: ONNX model has a newer ir_version (0.0.4) than this parser was built against (0.0.3).
WARNING: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
Successfully casted down to INT32.
While parsing node number 36 [Gather]:
ERROR: onnx2trt_utils.hpp:277 In function convert_axis:
[8] Assertion failed: axis >= 0 && axis < nbDims
[E] failed to parse onnx file
[E] Engine could not be created
[E] Engine could not be created
&&&& FAILED TensorRT.trtexec # ./trtexec --onnx=U_Net_ONNX.onnx --fp16

Conversion is done using above model with trained weight using torch.onnx.export .

Kindly let me know how can we fix this.