Dear @Morganh,
I have customize one inference script for the mobileNet_V1 classification trained on tao.
I have converted the etlt file in engine using tao-converter for batch size 1 and it is working fine.
below is the working code for batch size 1.
import os
import time
import cv2
#import matplotlib.pyplot as plt
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
from PIL import Image
import pdb
import codecs
import glob
import datetime
import shutil
import matplotlib.pyplot as plt
# input_shape = (3,236,236)
input_shape = (3,354,354)
fallen_label = ["Fallen","Normal"]
# Input Params
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
def load_engine(trt_runtime, engine_path):
with open(engine_path, "rb") as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
# def allocate_buffers(engine, batch_size=-1):
def allocate_buffers(engine, batch_size=1):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
# pdb.set_trace()
size = trt.volume(engine.get_binding_shape(binding)) * batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
# print(f"input: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
# print(f"output: shape:{engine.get_binding_shape(binding)} dtype:{engine.get_binding_dtype(binding)}")
return inputs, outputs, bindings, stream
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async(
batch_size=batch_size, bindings=bindings, stream_handle=stream.handle
)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
def model_loading(trt_engine_path):
# TensorRT logger singleton
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# trt_engine_path = "/opt/smarg/surveillance_gateway_prod/surveillance_ai_model/x86_64/Secondary_NumberPlateClassification/lpr_us_onnx_b16.engine"
trt_runtime = trt.Runtime(TRT_LOGGER)
# pdb.set_trace()
trt_engine = load_engine(trt_runtime, trt_engine_path)
# Execution context is needed for inference
context = trt_engine.create_execution_context()
# input shape
context.set_binding_shape(0, input_shape)
# This allocates memory for network inputs/outputs on both CPU and GPU
inputs, outputs, bindings, stream = allocate_buffers(trt_engine)
return inputs, outputs, bindings, stream, context
def preprocess_res18(image):
# image = np.asarray(image.resize((224, 224), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(trt.float32)).ravel()
image = np.asarray(image.resize((input_shape[1], input_shape[2]), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(trt.float32)).ravel()
return image
trt_engine_path = "./Models/V2.2/MobileNet_V1/MobileNetV1_ReTrain_Ep70_ZeroConfAug2_SIZE_416X416_FallenObjectClassification_V2.2_fp16_b1.engine"
camera_frames_path = "./TestSampleFrame/73_43/"
output_folder_path = "./output/73_430_output/"
acc_threshold = 95
cropped_images = "./output/Accuracy_wise_analysis_73_430_output/"
if not os.path.exists(output_folder_path):
os.mkdir(output_folder_path)
if not os.path.exists(cropped_images):
os.mkdir(cropped_images)
inputs, outputs, bindings, stream, context = model_loading(trt_engine_path)
cropped_img_count = 1
image_count = 0
import glob
from PIL import Image, ImageDraw
# Function to perform classification on each cropped area and draw rectangles
def classify_and_draw(image, fallen_area, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold):
try:
# image = Image.open(image_path)
draw = ImageDraw.Draw(image)
global cropped_img_count
global image_count
box_coordinates = [] # List to store box coordinates
for area_coordinates in fallen_area:
x1, y1, x2, y2 = area_coordinates['XMIN'], area_coordinates['YMIN'], area_coordinates['XMAX'], area_coordinates['YMAX']
if (x2-x1)>60 and (y2-y1)>60:
area_image_cropped = image.crop((x1, y1, x2, y2))
area_image = preprocess_res18(area_image_cropped) # Preprocess the cropped area
# print(area_image.shape)
np.copyto(inputs[0].host, area_image) # Copy preprocessed image to input buffer
output = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # Perform inference
max_index_row = np.argmax(output[0], axis=0)
fallen_normal_acc = int('{:.0f}'.format(output[0][max_index_row]*100))
fallen_label_info = fallen_label[max_index_row]
color = "green"
if fallen_label_info == "Fallen":
# color = "yellow"
color = "red"
if fallen_normal_acc>20:
image_name = f"{image_count}.jpg"
# crop_img_name = cropped_images + "/" + image_name + "_" + str(cropped_img_count)+"_"+str(fallen_normal_acc)+".jpg"
crop_img_name = cropped_images + "/"+str(fallen_normal_acc)+"_" + image_name + "_" + str(cropped_img_count)+".jpg"
cropped_img_count += 1
area_image_cropped.save(crop_img_name)
box_coordinates.append((x1, y1, x2, y2, color, fallen_label_info, fallen_normal_acc))
# Draw all boxes after the loop
for box_info in box_coordinates:
x1, y1, x2, y2, color, fallen_label_info, fallen_normal_acc = box_info
border_width=10
draw.rectangle([x1, y1, x2, y2], outline=color)
# inner_rect = [x1 + border_width, y1 + border_width, x2 - border_width, y2 - border_width]
# draw.rectangle(inner_rect,outline=color, fill=None)
if fallen_normal_acc > acc_threshold:
draw.text((x1, y1), f"{fallen_label_info} ({fallen_normal_acc}%)", fill=color)
image.save(f"{output_folder_path}/{image_count}.jpg")
image_count+=1
print(f"processed images are : {image_count}")
except Exception as e:
print("exception as : ",e)
pass
# Wrong violation analysis
# for camera_dir in glob.glob(camera_frames_path+"/*"):
# camera_name = camera_dir.split("/")[-1]
# fallen_area_data = fallen_area[100 + int(camera_name_mapping[camera_name])]
# for image_path in glob.glob(camera_dir+"/*"):
# classify_and_draw(image_path, fallen_area_data, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold)
camera_image_mapping = {
131:31,
132:32,
133:33,
134:34,
135:35,
136:36,
137:37,
138:38,
139:39,
140:40,
141:41,
142:42,
143:43
}
camera_name_mapping={
'59_330':31,
'60_570':32,
'61_070':33,
'62_210':34,
'65_220':35,
'66_550':36,
'73_430':37,
'74_820':38,
'75_420':39,
'76_590':40,
'83_860':41,
'85_650':43
}
# next 59_330 : 31
area = fallen_area[137]
video_path = "./TestSampleFrame/fallen_22March_16_1630/22march_4_430/73+430_DT_2024-03-22_16:00:01.550223_DT_003d.mkv"
frame_interval = 10
frame_count = 0
# Initialize OpenCV video capture
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error: Unable to open video.")
exit()
while cap.isOpened():
ret, frame = cap.read()
# if not ret:
# break # Break the loop if there are no more frames
try:
frame_count+=1
print(frame_count)
frame = cv2.resize(frame, (1920, 1080))
image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image_pil = Image.fromarray(image_rgb)
if frame_count%frame_interval==0:
classify_and_draw(image_pil, area, context, bindings, inputs, outputs, stream, fallen_label, acc_threshold)
else:
image_pil.save(f"{output_folder_path}/{image_count}.jpg")
except Exception as e:
print("---",e)
# Press 'q' to exit the loop
if cv2.waitKey(1) & 0xFF == ord('q'):
break
print(cap.isOpened())
Can you please suggest how can i modify it for batch size n (n>1) ?
also please suggest how to process batch input before passing to the model.
I have tried but getting size issues while feeding batch input.
Thanks.