After getting result, occured segementation falut

Description

A clear and concise description of the bug or issue.

When executing the code, a segmentation fault occurs with error code 1 after the result is obtained.

this is log …

Writing output image to file output.png
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
Segmentation fault (core dumped)

Environment

I used docker

docker version : nvcr.io/nvidia/tensorrt:22.12-py3

Relevant Files

inference.py

import numpy as np
import os
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

import matplotlib.pyplot as plt
from PIL import Image

TRT_LOGGER = trt.Logger()

engine_file = “fcn-resnet101.engine”
input_file = “input.ppm”
output_file = “output.png”

def preprocess(image):
image = image.convert(‘RGB’)
mean = np.array([0.485, 0.456, 0.406]).astype(‘float32’)
stddev = np.array([0.229, 0.224, 0.225]).astype(‘float32’)
data = (np.asarray(image).astype(‘float32’) / float(255.0) - mean) / stddev
return np.moveaxis(data, 2, 0)

def postprocess(data):
num_classes = 21
palette = np.array([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
colors = np.array([palette*i%255 for i in range(num_classes)]).astype(“uint8”)
img = Image.fromarray(data.astype(‘uint8’), mode=‘P’)
img.putpalette(colors)
return img

def load_engine(engine_file_path):
assert os.path.exists(engine_file_path)
print(“Reading engine from file {}”.format(engine_file_path))
with open(engine_file_path, “rb”) as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())

def infer(engine, input_file, output_file):
print(“Reading input image from file {}”.format(input_file))
with Image.open(input_file) as img:
input_image = preprocess(img)
image_width = img.width
image_height = img.height

with engine.create_execution_context() as context:
    context.set_binding_shape(engine.get_binding_index("input"), (1, 3, image_height, image_width))
    bindings = []
    out_idx = 0
    for binding in engine:
        binding_idx = engine.get_binding_index(binding)
        size = trt.volume(context.get_binding_shape(binding_idx))
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        if engine.binding_is_input(binding):
            input_buffer = np.ascontiguousarray(input_image)
            input_memory = cuda.mem_alloc(input_image.nbytes)
            bindings.append(int(input_memory))
        else:
            output_buffer = cuda.pagelocked_empty(size, dtype)
            output_memory = cuda.mem_alloc(output_buffer.nbytes)
            bindings.append(int(output_memory))

    stream = cuda.Stream()
    cuda.memcpy_htod_async(input_memory, input_buffer, stream)
    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
    cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)
    stream.synchronize()

with postprocess(np.reshape(output_buffer, (image_height, image_width))) as img:
    print("Writing output image to file {}".format(output_file))
    img.convert('RGB').save(output_file, "PNG")

if name==“main”:
print(“Running TensorRT inference for FCN-ResNet101”)
with load_engine(engine_file) as engine:
infer(engine, input_file, output_file)

export.py
from PIL import Image
from io import BytesIO
import requests

output_image=“input.ppm”

print(“Exporting ppm image {}”.format(output_image))
response = requests.get(“https://pytorch.org/assets/images/deeplab1.png”)
with Image.open(BytesIO(response.content)) as img:
ppm = Image.new(“RGB”, img.size, (255, 255, 255))
ppm.paste(img, mask=img.split()[3])
ppm.save(output_image)

import torch
import torch.nn as nn

output_onnx=“fcn-resnet101.onnx”

class FCN_ResNet101(nn.Module):
def init(self):
super(FCN_ResNet101, self).init()
self.model = torch.hub.load(‘pytorch/vision:v0.6.0’, ‘fcn_resnet101’, pretrained=True)

def forward(self, inputs):
    x = self.model(inputs)['out']
    x = x.argmax(1, keepdims=True)
    return x

model = FCN_ResNet101()
model.eval()

input_tensor = torch.rand(4, 3, 224, 224)

print(“Exporting ONNX model {}”.format(output_onnx))
torch.onnx.export(model, input_tensor, output_onnx,
opset_version=12,
do_constant_folding=True,
input_names=[“input”],
output_names=[“output”],
dynamic_axes={“input”: {0: “batch”, 2: “height”, 3: “width”},
“output”: {0: “batch”, 2: “height”, 3: “width”}},
verbose=False)

command for making engine file

trtexec --onnx=fcn-resnet101.onnx --fp16 --workspace=64 --minShapes=input:1x3x256x256 --optShapes=input:1x3x1026x1282 --maxShapes=input:1x3x1440x2560 --buildOnly --saveEngine=fcn-resnet101.engine

Please attach or include links to any models, data, files, or scripts necessary to reproduce your issue. (Github repo, Google Drive, Dropbox, etc.)