Description
A clear and concise description of the bug or issue.
Environment
TensorRT Version: 8.5.3.1
GPU Type: RTX 3060
Nvidia Driver Version: 520.61.05
CUDA Version: 11.8
CUDNN Version: 10.1
Operating System + Version: ubuntu 20.04
Python Version (if applicable): 3.8.16
Hi, In below code i am trying to convert CRAFT model into tensorrt format, but in the output i am getting [(3145728,),(196608,)] instead of [(1, 32, 256, 384),(1, 256, 384, 2)]. as you can see it’s combining all the dimensions into one, i am using below code to do this,
class Extractor(object):
def __init__(self, model_path):
with open(model_path, "rb") as fp:
self.plan = fp.read()
self.logger = trt.Logger()
runtime = trt.Runtime(self.logger)
self.engine = runtime.deserialize_cuda_engine(self.plan)
self.context = self.engine.create_execution_context()
class HostDeviceMem(object):
"""Simple helper data class that's a little nicer to use than a 2-tuple."""
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
ext = Extractor("plan/craft_mlt_25k.plan")
img = cv2.imread('./images/test6.jpg')
img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(img, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
ratio_h = ratio_w = 1 / target_ratio
x = imgproc.normalizeMeanVariance(img_resized)
# x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w]
# x = x.unsqueeze(0) # [c, h, w] to [b, c, h, w]
x = np.copy(x.transpose(-1,0,1),order="C")
x = np.expand_dims(x,axis=0)
x = np.float32(x)
stream = cuda.Stream()
bindings = []
inputs = []
outputs = []
for binding in ext.engine:
binding_dims = ext.engine.get_tensor_shape(binding)
size = trt.volume(binding_dims)
dtyp = trt.nptype(ext.engine.get_tensor_dtype(binding))
host_mem = cuda.pagelocked_empty(size,dtyp)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if ext.engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem,device_mem))
else:
outputs.append(HostDeviceMem(host_mem,device_mem))
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
ext.context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
stream.synchronize()
out = [out.host for out in outputs]
please let me know what i am doing wrong here.