I converted ONNX to .engine by command
./trtexec --onnx=/opt/nvidia/deepstream/deepstream-6.2/sources/yolov7/YOLOv7_Chien/pretrained_models/yolov7.onnx --explicitBatch --saveEngine=/opt/nvidia/deepstream/deepstream-6.2/sources/yolov7/YOLOv7_Chien/pretrained_models/yolov7_fp32.engine --workspace=4096
And evaluate .engine model, a part of code as follow
logger = trt.Logger(trt.Logger.WARNING)
logger.min_severity = trt.Logger.Severity.ERROR
runtime = trt.Runtime(logger)
trt.init_libnvinfer_plugins(logger,'') # initialize TensorRT plugins
with open(engine_path, "rb") as f:
serialized_engine = f.read()
engine = runtime.deserialize_cuda_engine(serialized_engine)
self.imgsz = engine.get_binding_shape(0)[2:] # get the read shape of model, in case user input it wrong
self.context = engine.create_execution_context()
self.inputs, self.outputs, self.bindings = [], [], []
self.stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) #CHANGE
dtype = trt.nptype(engine.get_binding_dtype(binding))
print("ab", type(size), dtype)
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
self.bindings.append(int(device_mem))
if engine.binding_is_input(binding):
self.inputs.append({'host': host_mem, 'device': device_mem})
else:
self.outputs.append({'host': host_mem, 'device': device_mem})
But I got error
Namespace(engine='../YOLOv7_Chien/pretrained_models/yolov7_fp32.engine', folder='val2017/')
Traceback (most recent call last):
File "infer_trt_yolov7.py", line 44, in <module>
pred = Predictor(engine_path=args.engine)
File "infer_trt_yolov7.py", line 14, in __init__
super(Predictor, self).__init__(engine_path)
File "/opt/nvidia/deepstream/deepstream-6.2/sources/yolov7/YOLOv7_TensorRT_Evaluation/utils/utils.py", line 40, in __init__
host_mem = cuda.pagelocked_empty(size, dtype)
pycuda._driver.MemoryError: cuMemHostAlloc failed: out of memory
My GPU has 11 GB ram, i think it’s sufficient to run evaluate .engine model. Please help me.