Hello,
I have a python script that uses three gstreamer pipelines in OpenCV, the script runs well, I can capture images and process them but running the script for any extended (few min to few hours) period of time results in a kernel panic. Any idea on how I can fix this?
The SOM is a jetson nano production module, on custom carrier board with three CSI cameras (IMX219)
uname -a output:
Linux probe-v3 4.9.253-tegra #1 SMP PREEMPT Tue Aug 16 12:53:21 EDT 2022 aarch64 aarch64 aarch64 GNU/Linux
debug output:
[ 204.635156] Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 0
[ 204.642383] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.9.253-tegra #1
[ 204.648901] Hardware name: Probe V3 (DT)
[ 204.653683] Call trace:
[ 204.656132] [<ffffff800808ba40>] dump_backtrace+0x0/0x198
[ 204.661525] [<ffffff800808c004>] show_stack+0x24/0x30
[ 204.666573] [<ffffff8008f62cfc>] dump_stack+0xa0/0xc4
[ 204.671618] [<ffffff8008f5fda0>] panic+0x12c/0x2a8
[ 204.676405] [<ffffff80081819b4>] watchdog_check_hardlockup_other_cpu+0x11c/0x120
[ 204.683791] [<ffffff8008180b28>] watchdog_timer_fn+0x98/0x2c0
[ 204.689531] [<ffffff8008138f30>] __hrtimer_run_queues+0xd8/0x360
[ 204.695530] [<ffffff8008139880>] hrtimer_interrupt+0xa8/0x1e0
[ 204.701271] [<ffffff8008bfc100>] tegra210_timer_isr+0x38/0x48
[ 204.707009] [<ffffff8008121960>] __handle_irq_event_percpu+0x68/0x288
[ 204.713440] [<ffffff8008121ba8>] handle_irq_event_percpu+0x28/0x60
[ 204.719612] [<ffffff8008121c30>] handle_irq_event+0x50/0x80
[ 204.725178] [<ffffff8008125ac4>] handle_fasteoi_irq+0xd4/0x1c0
[ 204.731003] [<ffffff8008120914>] generic_handle_irq+0x34/0x50
[ 204.736741] [<ffffff8008121000>] __handle_domain_irq+0x68/0xc0
[ 204.742564] [<ffffff8008080d44>] gic_handle_irq+0x5c/0xb0
[ 204.747960] [<ffffff8008082c28>] el1_irq+0xe8/0x194
[ 204.752838] [<ffffff8008ba2300>] cpuidle_enter_state+0xb8/0x380
[ 204.758754] [<ffffff8008ba263c>] cpuidle_enter+0x34/0x48
[ 204.764062] [<ffffff80081113bc>] call_cpuidle+0x44/0x70
[ 204.769283] [<ffffff8008111738>] cpu_startup_entry+0x1b0/0x200
[ 204.775114] [<ffffff8008091cf8>] secondary_start_kernel+0x190/0x1f8
[ 204.781377] [<0000000084f701a8>] 0x84f701a8
[ 204.785560] SMP: stopping secondary CPUs
[ 205.913616] SMP: failed to stop secondary CPUs 0-1
[ 205.918409] Kernel Offset: disabled
[ 205.921897] Memory Limit: none
[ 205.934821] Rebooting in 5 seconds..
[ 210.938972] SMP: stopping secondary CPUs
[ 212.066665] SMP: failed to stop secondary CPUs 0-1
minimum python script to reproduce the issue:
#!/usr/bin/python3
import cv2
import json
import numpy as np
import os
import signal
import sys
import threading
import time
import zmq
from zmq_functions import *
from Focuser import *
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GLib, GObject, Gst, GstBase, GstVideo
Gst.init(None)
from subprocess import check_output
class camera:
def __init__(self, camNum, config, context):
self.config = config
self.camNum = camNum
self.camDev = config['sensor_id'][camNum]
self.running = True
self.thread = None
self.image = None
self.gstbuffer = None
self.output = None
self.sink_size = None
self.lens_bus = config['lens_i2c_bus'][camNum]
self.lens = None
self.lock = threading.RLock()
self.ipc = context.socket(zmq.REP)
self.ipc.bind(config['ipc_file'][camNum])
print("ZeroMQ IPC Socket bound.")
self.poller = zmq.Poller()
self.poller.register(self.ipc, zmq.POLLIN)
def open_camera(self):
pipeline = self.define_pipeline_jetson()
self.pipeline = Gst.parse_launch(pipeline)
self.lens = Focuser(self.lens_bus)
# Set up refferace to the camera so that we can change the image settings,
self.mipi_camera = self.pipeline.get_by_name('src_camera')
# Set up appsink for image src for opencv
self.appsink = self.pipeline.get_by_name('appsink')
self.appsink.connect("new-sample", self.on_new_sample)
self.appsinkValve = self.pipeline.get_by_name('appsinkValve')
# Set up bus to catch messages
self.bus = self.pipeline.get_bus()
self.bus.add_signal_watch()
self.bus.connect('message', self.on_bus_message)
print("%s: Camera opened." %(self.camDev))
def define_pipeline_jetson(self):
pipeline = ('nvarguscamerasrc name=src_camera sensor_id=%d tnr-strength=1 tnr-mode=2 ! ' \
'video/x-raw(memory:NVMM), width=(int)%d, height=(int)%d, framerate=(fraction)%d/1 ! ' \
'valve name=appsinkValve drop=false ! nvvidconv ! ' \
'video/x-raw, ' \
'format=(string)BGRx ! videoconvert ! video/x-raw, ' \
'format=(string)BGR ! appsink name=appsink emit-signals=True' %(self.camDev, self.config['frameSize'][0], self.config['frameSize'][1], self.config['frameRate']))
print('Gstreamer pipeline: %s' %pipeline)
return(pipeline)
def setup_camera(self):
self.open_camera()
self.resume_pipeline()
self.stop_app_sink()
def start_app_sink(self):
print("Enabling appsink stream.")
if self.mipi_camera is not None:
self.appsinkValve.set_property("drop", False)
def stop_app_sink(self):
print("Disabling appsink stream.")
if self.mipi_camera is not None:
self.appsinkValve.set_property("drop", True)
def shutdown_pipeline(self):
self.pipeline.set_state(Gst.State.NULL)
def resume_pipeline(self):
print("Resuming pipeline.")
if self.pipeline is not None:
self.pipeline.set_state(Gst.State.PLAYING)
self.pipeline.get_state(Gst.CLOCK_TIME_NONE)
def on_bus_message(self, bus, message):
t = message.type
if t == Gst.MessageType.EOS:
Gtk.main_quit()
elif t == Gst.MessageType.WARNING:
err, debug = message.parse_warning()
sys.stderr.write('Warning: %s: %s\n' % (err, debug))
elif t == Gst.MessageType.ERROR:
err, debug = message.parse_error()
sys.stderr.write('GST_Error: %s: %s\n' % (err, debug))
Gtk.main_quit()
return True
def on_new_sample(self, sink):
print("GOT NEW SAMPLE")
sample = sink.emit('pull-sample')
caps = sample.get_caps()
# Extract the width and height info from the sample's caps
height = caps.get_structure(0).get_value("height")
width = caps.get_structure(0).get_value("width")
# Get the actual data
buffer = sample.get_buffer()
# Get read access to the buffer data
success, map_info = buffer.map(Gst.MapFlags.READ)
if not success:
raise RuntimeError("Could not map buffer data!")
numpy_frame = np.ndarray(
shape=(height, width, 3),
dtype=np.uint8,
buffer=map_info.data)
self.image = numpy_frame
# Clean up the buffer mapping
buffer.unmap(map_info)
return Gst.FlowReturn.OK
def poll_for_reqs(self):
socks = dict(self.poller.poll(0))
if self.ipc in socks and socks[self.ipc] == zmq.POLLIN:
message = self.ipc.recv_json(zmq.NOBLOCK)
print("%s: GOT MESSAGE %s" %(self.camDev, message['request']))
if message['request'] == 'take_sample':
result = self.take_sample(message)
self.ipc.send_json(result)
elif message['request'] == 'shutdown':
self.shutdown_pipeline()
running = False
self.ipc.send_json({"success": True, "message": "Done"})
else:
self.ipc.send_json({"success": False, "message": "request not valid"})
def start_thread(self):
while self.running:
self.poll_for_reqs()
time.sleep(.05)
def take_sample(self, message):
#Set up
success = False
data = None
start_time = time.time()
frame = None
file_name = message['name'] + str(self.camNum) + ".jpg"
print("%s: Running sample image capture." % self.camDev)
w, h = self.config['frameSize']
self.start_app_sink()
self.image = None
print("%s: waititng for camera to focus: center laplace method" % self.camDev)
s = 150
focus = 0
max_focus = 0
max_laplace = 0
focus_counter = 0
for focus in range(0,1000,50):
self.lens.set(Focuser.OPT_FOCUS,focus)
frame = self.image
if frame is not None:
img = frame[int(h/2-s/2):int(h/2+s/2), int(w/2-s/2):int(w/2+s/2)]
current_laplace = cv2.Laplacian(img,cv2.CV_16U).var()
max_laplace, max_focus, focus_counter = (current_laplace, focus, 0) if current_laplace > max_laplace else (max_laplace, max_focus, focus_counter+1)
print("%s: focus: %d, laplace: %d" %(self.camDev, focus, current_laplace))
if focus_counter > 5:
break
else:
print("Fame is none")
frame_attempt = 0
while self.running and frame_attempt <=10:
frame = self.image
if frame is not None:
self.stop_app_sink()
print("%s: Saving image %s" %(self.camDev, file_name))
cv2.imwrite(file_name, frame)
success = True
break
else:
print("%s: Frame is None." %(self.camDev))
time.sleep(1)
frame_attempt +=1
success = False
print("%s: Done with sample capture: %s s" %(self.camDev, time.time()-start_time))
return {"success": success, "message": file_name}
def stop(self):
self.running = False
self.shutdown_pipeline()
class fastCam:
def setUpCams(self, camList, config):
cameras = []
for camNum, i in enumerate(camList):
if i != "":
cam = camera(camNum, config, self.context)
is_open = cam.setup_camera()
if True:
cameras.append(cam)
self.threads.append(threading.Thread(target=cam.start_thread))
self.ipc_sockets.append(zmq_req_rep_socket(self.context, config['ipc_file'][camNum]))
for t in self.threads:
t.start()
return(cameras)
def __init__(self, config, context):
self.config = config
self.threads = []
self.ipc_sockets = []
self.context = context
print("Camera Server : Setting up cameras")
self.cams = tuple(self.setUpCams(self.config['sensor_id'], self.config))
def __del__(self):
try:
print("Camera Server : Closing cameras.")
self.stop()
except Exception as inst:
app_log.error(inst)
def stop(self):
print("Camera Server : Stopping all threads")
for cam in self.cams:
cam.stop()
if self.threads is not None:
for t in self.threads:
t.join()
print("All threads have joined")
self.threads = []
def take_sample(self):
message = {
"request" : "take_sample",
"name" : "./sample-image-"
}
for socket in self.ipc_sockets:
socket.send(message)
success = []
data = []
for socket in self.ipc_sockets:
_success, _data = socket.receive()
success.append(_success)
data.append(_data)
print("Camera Server : All camera threads have responded")
print("Camera Server : Success: %s" %success)
print("Camera Server : Data: %s" %data)
return success, data
class server:
def receive_signal(self, signum, stack):
print("got trigger value: %s" %signum)
if signum == signal.SIGUSR1:
self.camObj.take_sample()
elif signum == signal.SIGQUIT:
self.stop = True
def __init__(self):
signal.signal(signal.SIGUSR1, self.receive_signal)
signal.signal(signal.SIGQUIT, self.receive_signal)
print("Signals registered.")
self.context = zmq.Context()
self.stop = False
self.config = {
"sensor_id": [
0,
1,
2
],
"lens_i2c_bus": [
7,
8,
9
],
"ipc_file": [
"ipc:///tmp/camera_ipc_0",
"ipc:///tmp/camera_ipc_1",
"ipc:///tmp/camera_ipc_2"
],
"frameSize":[
3264,
1848
],
"frameRate": 15
}
self.camObj = fastCam(self.config, self.context)
def run(self):
while not self.stop:
time.sleep(.25)
if self.camObj is not None:
self.camObj.stop()
if __name__ == "__main__":
server = server()
server.run()
print("Camera server is done exiting")
in a separate terminal window:
while true; do sudo pkill -USR1 minimum_camera; sleep 10; done