Hi. I’m trying to inference pruned peoplenet model using tensor rt, but always get zero coverage output. So, i downloaded peoplenet tlt using command from this topic: How to run tlt-converter
After that i converted .eltl to engine using next command:
./tlt-converter /home/bronstein/tlt-experiments/resnet34_peoplenet_pruned.etlt -k tlt_encode -o output_cov/Sigmoid,output_bbox/BiasAdd -d 3,544,960 -i nchw -e /home/bronstein/tlt-experiments/engine/peoplenet.engine -m 1 -t fp16
Then i tried to inference this model using image from peoplenet main page:
Here is code which i used:
import numpy as np
import cv2
import time
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
runtime = trt.Runtime(TRT_LOGGER)
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
def Inference(engine):
im = cv2.imread('input_11ft45deg_000070.jpg')
# im = cv2.resize(im, (640, 640))
im = cv2.resize(im, (960, 544))
im = np.asarray(im).astype(np.float32)
im = im.transpose(2,0,1) / 255
print(np.shape(host_inputs[0]), np.shape(im))
np.copyto(host_inputs[0], im.ravel())
stream = cuda.Stream()
context = engine.create_execution_context()
start_time = time.time()
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
stream.synchronize()
print("execute times "+str(time.time()-start_time))
print(host_outputs[1], np.max(host_outputs[1]))
def PrepareEngine():
# deserialize engine
with open('peoplenet.engine', 'rb') as f:
buf = f.read()
engine = runtime.deserialize_cuda_engine(buf)
# create buffer
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
host_mem = cuda.pagelocked_empty(shape=[size],dtype=np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(cuda_mem))
if engine.binding_is_input(binding):
print(engine.get_binding_shape(binding))
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
print(engine.get_binding_shape(binding))
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
return engine
if __name__ == "__main__":
engine = PrepareEngine()
Inference(engine)
And np.max(host_outputs[1]) gives me 0.0. What am i doing wrong?