Description
A clear and concise description of the bug or issue.
When executing the code, a segmentation fault occurs with error code 1 after the result is obtained.
this is log …
Writing output image to file output.png
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
[05/02/2024-12:53:53] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
Segmentation fault (core dumped)
Environment
I used docker
docker version : nvcr.io/nvidia/tensorrt:22.12-py3
Relevant Files
inference.py
import numpy as np
import os
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import matplotlib.pyplot as plt
from PIL import Image
TRT_LOGGER = trt.Logger()
engine_file = “fcn-resnet101.engine”
input_file = “input.ppm”
output_file = “output.png”
def preprocess(image):
image = image.convert(‘RGB’)
mean = np.array([0.485, 0.456, 0.406]).astype(‘float32’)
stddev = np.array([0.229, 0.224, 0.225]).astype(‘float32’)
data = (np.asarray(image).astype(‘float32’) / float(255.0) - mean) / stddev
return np.moveaxis(data, 2, 0)
def postprocess(data):
num_classes = 21
palette = np.array([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
colors = np.array([palette*i%255 for i in range(num_classes)]).astype(“uint8”)
img = Image.fromarray(data.astype(‘uint8’), mode=‘P’)
img.putpalette(colors)
return img
def load_engine(engine_file_path):
assert os.path.exists(engine_file_path)
print(“Reading engine from file {}”.format(engine_file_path))
with open(engine_file_path, “rb”) as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
def infer(engine, input_file, output_file):
print(“Reading input image from file {}”.format(input_file))
with Image.open(input_file) as img:
input_image = preprocess(img)
image_width = img.width
image_height = img.height
with engine.create_execution_context() as context:
context.set_binding_shape(engine.get_binding_index("input"), (1, 3, image_height, image_width))
bindings = []
out_idx = 0
for binding in engine:
binding_idx = engine.get_binding_index(binding)
size = trt.volume(context.get_binding_shape(binding_idx))
dtype = trt.nptype(engine.get_binding_dtype(binding))
if engine.binding_is_input(binding):
input_buffer = np.ascontiguousarray(input_image)
input_memory = cuda.mem_alloc(input_image.nbytes)
bindings.append(int(input_memory))
else:
output_buffer = cuda.pagelocked_empty(size, dtype)
output_memory = cuda.mem_alloc(output_buffer.nbytes)
bindings.append(int(output_memory))
stream = cuda.Stream()
cuda.memcpy_htod_async(input_memory, input_buffer, stream)
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)
stream.synchronize()
with postprocess(np.reshape(output_buffer, (image_height, image_width))) as img:
print("Writing output image to file {}".format(output_file))
img.convert('RGB').save(output_file, "PNG")
if name==“main”:
print(“Running TensorRT inference for FCN-ResNet101”)
with load_engine(engine_file) as engine:
infer(engine, input_file, output_file)
export.py
from PIL import Image
from io import BytesIO
import requests
output_image=“input.ppm”
print(“Exporting ppm image {}”.format(output_image))
response = requests.get(“https://pytorch.org/assets/images/deeplab1.png”)
with Image.open(BytesIO(response.content)) as img:
ppm = Image.new(“RGB”, img.size, (255, 255, 255))
ppm.paste(img, mask=img.split()[3])
ppm.save(output_image)
import torch
import torch.nn as nn
output_onnx=“fcn-resnet101.onnx”
class FCN_ResNet101(nn.Module):
def init(self):
super(FCN_ResNet101, self).init()
self.model = torch.hub.load(‘pytorch/vision:v0.6.0’, ‘fcn_resnet101’, pretrained=True)
def forward(self, inputs):
x = self.model(inputs)['out']
x = x.argmax(1, keepdims=True)
return x
model = FCN_ResNet101()
model.eval()
input_tensor = torch.rand(4, 3, 224, 224)
print(“Exporting ONNX model {}”.format(output_onnx))
torch.onnx.export(model, input_tensor, output_onnx,
opset_version=12,
do_constant_folding=True,
input_names=[“input”],
output_names=[“output”],
dynamic_axes={“input”: {0: “batch”, 2: “height”, 3: “width”},
“output”: {0: “batch”, 2: “height”, 3: “width”}},
verbose=False)
command for making engine file
trtexec --onnx=fcn-resnet101.onnx --fp16 --workspace=64 --minShapes=input:1x3x256x256 --optShapes=input:1x3x1026x1282 --maxShapes=input:1x3x1440x2560 --buildOnly --saveEngine=fcn-resnet101.engine
Please attach or include links to any models, data, files, or scripts necessary to reproduce your issue. (Github repo, Google Drive, Dropbox, etc.)