Cuda runtime error we are facing

import os
import time
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import glob
import datetime
import shutil

Input shape that the model expects

input_shape = (3, 224, 224) # Update to the correct shape

class HostDeviceMem(object):
def init(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem

def __str__(self):
    return f"Host:\n{str(self.host)}\nDevice:\n{str(self.device)}"

def __repr__(self):
    return self.__str__()

def load_engine(trt_runtime, engine_path):
with open(engine_path, “rb”) as f:
engine_data = f.read()
return trt_runtime.deserialize_cuda_engine(engine_data)

def allocate_buffers(engine, batch_size=1):
inputs =
outputs =
bindings =
stream = cuda.Stream()

for binding in engine:
    size = trt.volume(engine.get_binding_shape(binding)) * batch_size
    dtype = trt.nptype(engine.get_binding_dtype(binding))
    host_mem = cuda.pagelocked_empty(size, dtype)
    device_mem = cuda.mem_alloc(host_mem.nbytes)
    bindings.append(int(device_mem))
    if engine.binding_is_input(binding):
        inputs.append(HostDeviceMem(host_mem, device_mem))
    else:
        outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
stream.synchronize()
return [out.host for out in outputs]

def model_loading(trt_engine_path):
os.environ[“CUDA_VISIBLE_DEVICES”] = “1”
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine = load_engine(trt_runtime, trt_engine_path)
context = trt_engine.create_execution_context()
context.set_binding_shape(0, input_shape)
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
return inputs, outputs, bindings, stream, context

def preprocess_image(image, target_shape):
image = image.resize((target_shape[2], target_shape[1]), Image.ANTIALIAS)
image = np.asarray(image).transpose([2, 0, 1]).astype(np.float32) / 255.0 # Normalize
return image.ravel()

trt_engine_path = “./models/resnet18_vehiclemakenet_pruned.etlt_fp16_b1.engine”
inputs, outputs, bindings, stream, context = model_loading(trt_engine_path)

image_folder_path = “./Input_till_13_aug/”
output_folder_path = f"{image_folder_path}/…/Output_till_13_aug/"

if not os.path.exists(output_folder_path):
os.makedirs(output_folder_path)

label_cls = [“Acura”, “Audi”, “BMW”, “Chevrolet”, “Chrysler”, “Dodge”, “Ford”, “GMC”, “Honda”, “Hyundai”, “Infiniti”, “Jeep”, “Kia”, “Lexus”, “Mazda”, “Mercedes”, “Nissan”, “Subaru”, “Toyota”, “Volkswagen”]

for image_path in glob.glob(image_folder_path + “*.jpg”):
print("image_path : ",image_path)
image_name = os.path.basename(image_path)
image = Image.open(image_path)
image = preprocess_image(image, input_shape)
np.copyto(inputs[0].host, image)

output = do_inference(context, bindings, inputs, outputs, stream)
vehicle_make_result = np.argmax(output[0], axis=0)
vehicle_make_acc = int(output[0][vehicle_make_result] * 100)
class_name = label_cls[vehicle_make_result]

print(f"Vehicle result: {class_name}")
print(f"Vehicle accuracy: {vehicle_make_acc}%")

if vehicle_make_acc > 70:
    output_cls_dir = os.path.join(output_folder_path, class_name)
    if not os.path.exists(output_cls_dir):
        os.makedirs(output_cls_dir)
    shutil.copy(image_path, os.path.join(output_cls_dir, image_name))

while running above code we are faing an problem [08/14/2024-11:58:45] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
Segmentation fault (core dumped)
please provide solution for the same