I refered the above repo, and gave already preprocessed input. my problem is not in input data, its in model.
Binding name: input
Binding size: (-1, 3, 300, 18, 1)
Binding name: fc_pred
Binding size: (-1, 3)
Because of the negative sign in biniding size , i am getting the below error while doing context.execute_async()
[12/23/2022-23:29:28] [TRT] [E] 3: [executionContext.cpp::resolveSlots::1482] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::resolveSlots::1482, condition: allInputDimensionsSpecified(routine)
)
[12/23/2022-23:29:28] [TRT] [E] 2: [executionContext.cpp::enqueueInternal::368] Error Code 2: Internal Error (Could not resolve slots: )
This is my inference script
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
class TLTClfModel(object):
def __init__(self,
engine_path):
self.trt_engine_path = engine_path
self.trt_runtime = trt.Runtime(TRT_LOGGER)
self.trt_engine = self.load_engine(self.trt_runtime, self.trt_engine_path)
self.inputs, self.outputs, self.bindings, self.stream = self.allocate_buffers(self.trt_engine)
self.context = self.trt_engine.create_execution_context()
# self.context.set_binding_shape(0, trt.tensorrt.Dims([3, 300, 18, 1]))
# self.context.set_binding_shape(1, trt.tensorrt.Dims([3]))
@staticmethod
def load_engine(trt_runtime, engine_path):
with open(engine_path, "rb") as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
def allocate_buffers(self,engine, batch_size=-1):
"""Allocates host and device buffer for TRT engine inference.
This function is similair to the one in common.py, but
converts network outputs (which are np.float32) appropriately
before writing them to Python buffer. This is needed, since
TensorRT plugins doesn't support output type description, and
in our particular case, we use NMS plugin as network output.
Args:
engine (trt.ICudaEngine): TensorRT engine
Returns:
inputs [HostDeviceMem]: engine input memory
outputs [HostDeviceMem]: engine output memory
bindings [int]: buffer to device bindings
stream (cuda.Stream): cuda stream for engine inference synchronization
"""
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
# sizes = {"input":16200,"fc_pred":3}
for binding in engine:
name = str(binding)
# engine.set_binding_shape(sizes[name])
size = trt.volume(engine.get_binding_shape(binding)) * batch_size
print("Binding name:" ,name)
print("Binding size:",engine.get_binding_shape(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, np.float32)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def do_inference(self,context, bindings, inputs, outputs, stream, batch_size=1):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async(
bindings=bindings, stream_handle=stream.handle
)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
def predict(self,inputdata):
"""Infers model on batch of same sized images resized to fit the model.
Args:
image_paths (str): paths to images, that will be packed into batch
and fed into model
"""
inputs = self.inputs
outputs = self.outputs
bindings = self.bindings
stream = self.stream
inputdata = (inputdata.astype(trt.nptype(trt.float32))).ravel()
np.copyto(inputs[0].host, inputdata)
# When infering on single image, we measure inference
# time to output it to the user
# Fetch output from the model
detection_out = self.do_inference(
self.context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream
)
confid = np.max(detection_out)
pred = np.argmax(detection_out)
return pred, confid