I follow the steps GitHub - NVIDIA-AI-IOT/trt_pose: Real-time pose estimation accelerated with NVIDIA TensorRT warehouse in Jetson AGX Orin deploy trt_pose process.
When running /blob/master/tasks/human_pose/live_demo.ipynb
import trt_pose.models
num_parts = len(human_pose[‘keypoints’])
num_links = len(human_pose[‘skeleton’])
model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval(),
warning:/home/nvidia/.local/lib/python3.8/site-packages/torchvision-0.16.1-py3.8-linux-aarch64.egg/torchvision/models/_utils.py:208: UserWarning: The parameter ‘pretrained’ is deprecated since 0.13 and may be removed in the future, please use ‘weights’ instead.
warnings.warn(
/home/nvidia/.local/lib/python3.8/site-packages/torchvision-0.16.1-py3.8-linux-aarch64.egg/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or None
for ‘weights’ are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing weights=ResNet18_Weights.IMAGENET1K_V1
. You can also use weights=ResNet18_Weights.DEFAULT
to get the most up-to-date weights.
warnings.warn(msg)
When running:
import torch2trt
model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25) instruction,
error:
[06/18/2024-21:51:41] [TRT] [E] 3: 1.cmap_up.0:0:DECONVOLUTION:GPU:kernel weights has count 2097152 but 4194304 was expected
[06/18/2024-21:51:41] [TRT] [E] 4: 1.cmap_up.0:0:DECONVOLUTION:GPU: count of 2097152 weights in kernel, but kernel dimensions (4,4) with 512 input channels, 512 output channels and 1 groups were specified. Expected Weights count is 512 * 4*4 * 512 / 1 = 4194304
[06/18/2024-21:51:41] [TRT] [E] 3: 1.cmap_up.0:0:DECONVOLUTION:GPU:kernel weights has count 2097152 but 4194304 was expected
…
ValueError Traceback (most recent call last)
Cell In[6], line 2
1 import torch2trt
----> 2 model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)
File /usr/local/lib/python3.8/dist-packages/torch2trt-0.5.0-py3.8-linux-aarch64.egg/torch2trt/torch2trt.py:643, in torch2trt(module, inputs, input_names, output_names, log_level, fp16_mode, max_workspace_size, strict_type_constraints, keep_network, int8_mode, int8_calib_dataset, int8_calib_algorithm, use_onnx, default_device_type, dla_core, gpu_fallback, device_types, min_shapes, max_shapes, opt_shapes, onnx_opset, max_batch_size, avg_timing_iterations, **kwargs)
639 inputs_flat = input_flattener.flatten(inputs)
641 ctx.add_inputs(inputs_flat, input_names, dynamic_axes=dynamic_axes_flat)
→ 643 outputs = module(*inputs)
645 outputs_flat = output_flattener.flatten(outputs)
646 ctx.mark_outputs(outputs_flat, output_names)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1538, in Module._call_impl(self, *args, **kwargs)
1535 bw_hook = hooks.BackwardHook(self, full_backward_hooks, backward_pre_hooks)
1536 args = bw_hook.setup_input_hook(args)
→ 1538 result = forward_call(*args, **kwargs)
1539 if _global_forward_hooks or self._forward_hooks:
1540 for hook_id, hook in (
1541 *_global_forward_hooks.items(),
1542 *self._forward_hooks.items(),
1543 ):
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/container.py:217, in Sequential.forward(self, input)
215 def forward(self, input):
216 for module in self:
→ 217 input = module(input)
218 return input
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1538, in Module._call_impl(self, *args, **kwargs)
1535 bw_hook = hooks.BackwardHook(self, full_backward_hooks, backward_pre_hooks)
1536 args = bw_hook.setup_input_hook(args)
→ 1538 result = forward_call(*args, **kwargs)
1539 if _global_forward_hooks or self._forward_hooks:
1540 for hook_id, hook in (
1541 *_global_forward_hooks.items(),
1542 *self._forward_hooks.items(),
1543 ):
File /usr/local/lib/python3.8/dist-packages/trt_pose-0.0.1-py3.8-linux-aarch64.egg/trt_pose/models/common.py:76, in CmapPafHeadAttention.forward(self, x)
73 xp = self.paf_up(x)
74 ap = torch.tanh(self.paf_att(xp))
—> 76 return self.cmap_conv(xc * ac), self.paf_conv(xp * ap)
File /usr/local/lib/python3.8/dist-packages/torch2trt-0.5.0-py3.8-linux-aarch64.egg/torch2trt/torch2trt.py:262, in attach_converter..wrapper(*args, **kwargs)
259 ctx.method_str = method_str
261 # print(‘%s’ % (converter.name,))
→ 262 converter"converter"
264 # allow overwriting output, for things like shape converter
265 outputs = ctx.method_return
File /usr/local/lib/python3.8/dist-packages/torch2trt-0.5.0-py3.8-linux-aarch64.egg/torch2trt/converters/native_converters.py:1496, in convert_mul(ctx)
1494 output = ctx.method_return
1495 input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
→ 1496 input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape))
1497 layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.PROD)
1498 output._trt = layer.get_output(0)
File /usr/local/lib/python3.8/dist-packages/torch2trt-0.5.0-py3.8-linux-aarch64.egg/torch2trt/torch2trt.py:146, in broadcast_trt_tensors(network, trt_tensors, broadcast_ndim)
142 broadcasted_trt_tensors = [None] * len(trt_tensors)
144 for i, t in enumerate(trt_tensors):
→ 146 if len(t.shape) < broadcast_ndim:
147 # append 1 size dims to front
148 diff = broadcast_ndim - len(t.shape)
149 shape = tuple([1] * diff + list(t.shape))
ValueError: len() should return >= 0
Why did this error occur? Is it because of the tensorrt version? My jetpack version is 5.1.2, and the corresponding tensorrt version is 8.5.2.2.