I do Int8 calibration using TensorRT.
Once calibration is completed and test the inference. I have error at
stream.synchronize()
in the following function.
No issue running on FP32 and FP16 engines. Only have error running at Int8 engine. What could be wrong?
def infer(engine, x, batch_size, context):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
#img = np.array(x).ravel()
im = np.array(x, dtype=np.float32, order='C')
im = im[:,:,::-1]
#im = im.transpose((2,0,1))
#np.copyto(inputs[0].host, x.flatten()) #1.0 - img / 255.0
np.copyto(inputs[0].host, im.flatten())
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.