Script execution but don't display images

Hi all, I would do to work the script named detect_video.py available on this website NVIDIA Jetson and Raspberry Pi: JetsonNano - NVIDIA AI IOT - Human Pose estimation using TensorRT (bottom of the page), but I have a great problem when in the while loop I add cv2.imshow(“Video”, img), it can’t display this window, but the number of frame per second is displayed on my terminal. However the save of MP4 file has done the result is good with the skeleton done by this algorithm. Why can’t I display this images with my cam directly ? Thanks for your help.

I have many error messages.:

`Matplotlib created a tempory config/cache directory at /tmp/matplotlib-wz7cf4ja because the default path (/home/vpl/.cache/matplotlib) is not writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing `

The second is :
[WARN:0] global /home/nvidia/host/build_opencv/nv_opencv/modules/videoio/src/cap_gstreamer.cpp (933) open OpenCV | GStreamer warning : Cannot query video position: status=0, value=-1, duartion

import json
import trt_pose.coco
import trt_pose.models
import torch
import torch2trt
from torch2trt import TRTModule
import time, sys
import cv2
import torchvision.transforms as transforms
import PIL.Image
from trt_pose.draw_objects import DrawObjects
from trt_pose.parse_objects import ParseObjects
import argparse
import os.path

def get_keypoint(humans, hnum, peaks):
#check invalid human index
kpoint =
human = humans[0][hnum]
C = human.shape[0]
for j in range(C):
k = int(human[j])
if k >= 0:
peak = peaks[0][j][k] # peak[1]:width, peak[0]:height
peak = (j, float(peak[0]), float(peak[1]))
kpoint.append(peak)
#print(‘index:%d : success [%5.3f, %5.3f]’%(j, peak[1], peak[2]) )
else:
peak = (j, None, None)
kpoint.append(peak)
#print(‘index:%d : None %d’%(j, k) )
return kpoint

parser = argparse.ArgumentParser(description=‘TensorRT pose estimation run’)
parser.add_argument(‘–model’, type=str, default=‘resnet’, help = ‘resnet or densenet’ )
args = parser.parse_args()

with open(‘human_pose.json’, ‘r’) as f:
human_pose = json.load(f)
topology = trt_pose.coco.coco_category_to_topology(human_pose)
num_parts = len(human_pose[‘keypoints’])
num_links = len(human_pose[‘skeleton’])

if ‘resnet’ in args.model:
print(‘------ model = resnet--------’)
MODEL_WEIGHTS = ‘resnet18_baseline_att_224x224_A_epoch_249.pth’
OPTIMIZED_MODEL = ‘resnet18_baseline_att_224x224_A_epoch_249_trt.pth’
model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()
WIDTH = 224
HEIGHT = 224
else:
print(‘------ model = densenet--------’)
MODEL_WEIGHTS = ‘densenet121_baseline_att_256x256_B_epoch_160.pth’
OPTIMIZED_MODEL = ‘densenet121_baseline_att_256x256_B_epoch_160_trt.pth’
model = trt_pose.models.densenet121_baseline_att(num_parts, 2 * num_links).cuda().eval()
WIDTH = 256
HEIGHT = 256

data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()
if os.path.exists(OPTIMIZED_MODEL) == False:
model.load_state_dict(torch.load(MODEL_WEIGHTS))
model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)
torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))
t0 = time.time()
torch.cuda.current_stream().synchronize()
for i in range(50):
y = model_trt(data)

torch.cuda.current_stream().synchronize()
t1 = time.time()
print(50.0 / (t1 - t0))

mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
device = torch.device(‘cuda’)

def preprocess(image):
global device
device = torch.device(‘cuda’)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = PIL.Image.fromarray(image)
image = transforms.functional.to_tensor(image).to(device)
image.sub_(mean[:, None, None]).div_(std[:, None, None])
return image[None, …]

def execute(img, src, t):
color = (0, 255, 0)
data = preprocess(img)
cmap, paf = model_trt(data)
cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
counts, objects, peaks = parse_objects(cmap, paf)#, cmap_threshold=0.15, link_threshold=0.15)
fps = 1.0 / (time.time() - t)
for i in range(counts[0]):
keypoints = get_keypoint(objects, i, peaks)
for j in range(len(keypoints)):
if keypoints[j][1]:
x = round(keypoints[j][2] * WIDTH * X_compress)
y = round(keypoints[j][1] * HEIGHT * Y_compress)
cv2.circle(src, (x, y), 3, color, 2)
cv2.putText(src , “%d” % int(keypoints[j][0]), (x + 5, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 1)
cv2.circle(src, (x, y), 3, color, 2)
print("FPS:%f "%(fps))
#draw_objects(img, counts, objects, peaks)
cv2.putText(src , “FPS: %f” % (fps), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1)
out_video.write(src)

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

ret_val, img = cap.read()
fourcc = cv2.VideoWriter_fourcc(‘m’, ‘p’, ‘4’, ‘v’)
out_video = cv2.VideoWriter(‘/tmp/output.mp4’, fourcc, cap.get(cv2.CAP_PROP_FPS), (640, 480))
count = 0

X_compress = 640.0 / WIDTH * 1.0
Y_compress = 480.0 / HEIGHT * 1.0

if cap is None:
print(“Camera Open Error”)
sys.exit(0)

parse_objects = ParseObjects(topology)
draw_objects = DrawObjects(topology)

while cap.isOpened() and count < 500:
t = time.time()
ret_val, dst = cap.read()
if ret_val == False:
print(“Camera read Error”)
break

img = cv2.resize(dst, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA)
execute(img, dst, t)
cv2.imshow(“Video”, img)
count += 1

cv2.destroyAllWindows()
out_video.release()
cap.release()

Hi,

Could you add a waitKey to force OpenCV display the window immediately?

cv2.imshow(“Video”, img)
cv2.waitKey(0)

Thanks.