import os
import time
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import glob
import datetime
import shutil
Input shape that the model expects
input_shape = (3, 224, 224) # Update to the correct shape
class HostDeviceMem(object):
def init(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return f"Host:\n{str(self.host)}\nDevice:\n{str(self.device)}"
def __repr__(self):
return self.__str__()
def load_engine(trt_runtime, engine_path):
with open(engine_path, “rb”) as f:
engine_data = f.read()
return trt_runtime.deserialize_cuda_engine(engine_data)
def allocate_buffers(engine, batch_size=1):
inputs =
outputs =
bindings =
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
stream.synchronize()
return [out.host for out in outputs]
def model_loading(trt_engine_path):
os.environ[“CUDA_VISIBLE_DEVICES”] = “1”
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
trt_engine = load_engine(trt_runtime, trt_engine_path)
context = trt_engine.create_execution_context()
context.set_binding_shape(0, input_shape)
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
return inputs, outputs, bindings, stream, context
def preprocess_image(image, target_shape):
image = image.resize((target_shape[2], target_shape[1]), Image.ANTIALIAS)
image = np.asarray(image).transpose([2, 0, 1]).astype(np.float32) / 255.0 # Normalize
return image.ravel()
trt_engine_path = “./models/resnet18_vehiclemakenet_pruned.etlt_fp16_b1.engine”
inputs, outputs, bindings, stream, context = model_loading(trt_engine_path)
image_folder_path = “./Input_till_13_aug/”
output_folder_path = f"{image_folder_path}/…/Output_till_13_aug/"
if not os.path.exists(output_folder_path):
os.makedirs(output_folder_path)
label_cls = [“Acura”, “Audi”, “BMW”, “Chevrolet”, “Chrysler”, “Dodge”, “Ford”, “GMC”, “Honda”, “Hyundai”, “Infiniti”, “Jeep”, “Kia”, “Lexus”, “Mazda”, “Mercedes”, “Nissan”, “Subaru”, “Toyota”, “Volkswagen”]
for image_path in glob.glob(image_folder_path + “*.jpg”):
print("image_path : ",image_path)
image_name = os.path.basename(image_path)
image = Image.open(image_path)
image = preprocess_image(image, input_shape)
np.copyto(inputs[0].host, image)
output = do_inference(context, bindings, inputs, outputs, stream)
vehicle_make_result = np.argmax(output[0], axis=0)
vehicle_make_acc = int(output[0][vehicle_make_result] * 100)
class_name = label_cls[vehicle_make_result]
print(f"Vehicle result: {class_name}")
print(f"Vehicle accuracy: {vehicle_make_acc}%")
if vehicle_make_acc > 70:
output_cls_dir = os.path.join(output_folder_path, class_name)
if not os.path.exists(output_cls_dir):
os.makedirs(output_cls_dir)
shutil.copy(image_path, os.path.join(output_cls_dir, image_name))
while running above code we are faing an problem [08/14/2024-11:58:45] [TRT] [E] 1: [defaultAllocator.cpp::deallocate::42] Error Code 1: Cuda Runtime (invalid argument)
Segmentation fault (core dumped)
please provide solution for the same