hi jerryChang:
After debugging, I found that this is not an application layer issue, but a concurrency problem related to the rtcpu and kernel.
I added the following print in the kernel to assist me in locating the issue.
diff --git a/drivers/media/platform/tegra/camera/fusa-capture/capture-vi.c b/drivers/media/platform/tegra/camera/fusa-capture/capture-vi.c
index 0b9f1e8..3f61c1c 100644
--- a/drivers/media/platform/tegra/camera/fusa-capture/capture-vi.c
+++ b/drivers/media/platform/tegra/camera/fusa-capture/capture-vi.c
@@ -288,7 +288,7 @@ static void vi_capture_ivc_status_callback(
struct vi_capture *capture = (struct vi_capture *)pcontext;
struct tegra_vi_channel *chan = capture->vi_channel;
uint32_t buffer_index;
-
+ //printk("%s", __func__);
if (unlikely(capture == NULL)) {
dev_err(chan->dev, "%s: invalid context", __func__);
return;
@@ -419,7 +419,8 @@ static void vi_capture_ivc_control_callback(
dev_err(chan->dev, "%s: invalid response", __func__);
return;
}
-
+
+ printk("%s icontrol_msg->header.channel_id=%d control_msg->channel_setup_resp.channel_id=%d ",__func__,control_msg->header.channel_id,control_msg->channel_setup_resp.channel_id);
switch (control_msg->header.msg_id) {
case CAPTURE_CHANNEL_SETUP_RESP:
case CAPTURE_CHANNEL_RESET_RESP:
diff --git a/drivers/media/platform/tegra/camera/vi/vi5_fops.c b/drivers/media/platform/tegra/camera/vi/vi5_fops.c
index d03b9b7..2ca5b52 100644
--- a/drivers/media/platform/tegra/camera/vi/vi5_fops.c
+++ b/drivers/media/platform/tegra/camera/vi/vi5_fops.c
@@ -268,7 +268,7 @@ static int vi5_channel_open(struct tegra_channel *chan, u32 vi_port)
while (!found) {
sprintf(chanFilePath, "%s%u", VI_CHANNEL_DEV, channel);
-
+ printk(" %s try open /dev/capture-vi-channel%d ",__func__, channel);
filp = filp_open(chanFilePath, O_RDONLY, 0);
if (IS_ERR(filp)) {
diff --git a/drivers/platform/tegra/rtcpu/capture-ivc.c b/drivers/platform/tegra/rtcpu/capture-ivc.c
index 14e1ba3..30c4fae 100644
--- a/drivers/platform/tegra/rtcpu/capture-ivc.c
+++ b/drivers/platform/tegra/rtcpu/capture-ivc.c
@@ -154,7 +154,7 @@ int tegra_capture_ivc_register_control_cb(
*trans_id = (uint32_t)ctx_id;
cb_ctx->cb_func = control_resp_cb;
cb_ctx->priv_context = priv_context;
-
+ printk("%s trans_id=%d ", __func__,*trans_id);
mutex_unlock(&civc->cb_ctx_lock);
return 0;
@@ -181,7 +181,7 @@ int tegra_capture_ivc_notify_chan_id(uint32_t chan_id, uint32_t trans_id)
chan_id = array_index_nospec(chan_id, NUM_CAPTURE_CHANNELS);
trans_id = array_index_nospec(trans_id, TOTAL_CHANNELS);
-
+ printk("%s chan_id=%d trans_id=%d ", __func__,chan_id,trans_id);
civc = __scivc_control;
mutex_lock(&civc->cb_ctx_lock);
@@ -231,7 +231,7 @@ int tegra_capture_ivc_register_capture_cb(
"invalid channel id %u", chan_id))
return -EINVAL;
chan_id = array_index_nospec(chan_id, NUM_CAPTURE_CHANNELS);
-
+
if (!__scivc_capture)
return -ENODEV;
@@ -248,7 +248,7 @@ int tegra_capture_ivc_register_capture_cb(
ret = -EBUSY;
goto fail;
}
-
+ printk("%s chan_id=%d", __func__,chan_id);
civc->cb_ctx[chan_id].cb_func = capture_status_ind_cb;
civc->cb_ctx[chan_id].priv_context = priv_context;
mutex_unlock(&civc->cb_ctx_lock);
@@ -273,7 +273,7 @@ int tegra_capture_ivc_unregister_control_cb(uint32_t id)
return -ENODEV;
id = array_index_nospec(id, TOTAL_CHANNELS);
-
+ printk("%s id=%d", __func__,id);
civc = __scivc_control;
mutex_lock(&civc->cb_ctx_lock);
@@ -309,7 +309,7 @@ EXPORT_SYMBOL(tegra_capture_ivc_unregister_control_cb);
int tegra_capture_ivc_unregister_capture_cb(uint32_t chan_id)
{
struct tegra_capture_ivc *civc;
-
+ printk("%s chan_id=%d line =%d", __func__,chan_id,__LINE__);
if (chan_id >= NUM_CAPTURE_CHANNELS)
return -EINVAL;
@@ -317,7 +317,6 @@ int tegra_capture_ivc_unregister_capture_cb(uint32_t chan_id)
return -ENODEV;
chan_id = array_index_nospec(chan_id, NUM_CAPTURE_CHANNELS);
-
civc = __scivc_capture;
mutex_lock(&civc->cb_ctx_lock);
@@ -380,7 +379,7 @@ static inline void tegra_capture_ivc_recv(struct tegra_capture_ivc *civc)
id = hdr->channel_id;
trace_capture_ivc_recv(dev_name(dev), hdr->msg_id, id);
-
+ //printk("%s hdr->channel_id=%d", __func__,hdr->channel_id);
/* Check if message is valid */
if (id < TOTAL_CHANNELS) {
id = array_index_nospec(id, TOTAL_CHANNELS);
@@ -437,7 +436,7 @@ static int tegra_capture_ivc_probe(struct tegra_ivc_channel *chan)
const char *service;
int ret;
uint32_t i;
-
+ printk("%s->%d" , __func__, __LINE__);
civc = devm_kzalloc(dev, (sizeof(*civc)), GFP_KERNEL);
if (unlikely(civc == NULL))
return -ENOMEM;
Then I will discuss this issue from three aspects.
The three aspects are: normal one-way access to non-existent cameras, three-way interval of 0.5 seconds access to non-existent cameras, and two-way concurrent access to non-existent cameras.
Through these three experiments, I have roughly understood the cause of the problem.
Firstly, the first experiment. Normal access to non-existent cameras.
Execute the following instructions.
v4l2-ctl -V --set-fmt-video=width=1280,height=720,pixelformat=‘YUYV’ --set-ctrl bypass_mode=0 --stream-mmap --stream-count=100000 -d /dev/video0
The relevant log name is dmesg_one_camera.log.
dmesg_one_camera.log (104.7 KB)
The value of icontrol_rsg ->header.channel_id and the value of controll_rsg ->channel_detup-nesp.channel_id are normally equal.
Next is the second experiment.
The executed script is as follows, accessing three non-existent cameras every 0.5 seconds.
#!/bin/bash
for ((count=1; count<=30000; count++)); do
echo "Executing iteration $count"
for i in $(seq 0 1); do
v4l2-ctl -V --set-fmt-video=width=1280,height=720,pixelformat='YUYV' --set-ctrl bypass_mode=0 --stream-mmap --stream-count=100000 -d /dev/video0 &
sleep 0.5
v4l2-ctl -V --set-fmt-video=width=1280,height=720,pixelformat='YUYV' --set-ctrl bypass_mode=0 --stream-mmap --stream-count=100000 -d /dev/video1 &
sleep 0.5
v4l2-ctl -V --set-fmt-video=width=1280,height=720,pixelformat='YUYV' --set-ctrl bypass_mode=0 --stream-mmap --stream-count=100000 -d /dev/video2 &
done
wait # 等待所有后台任务完成
done
The log name is dmesg_three_0.5s.log.
dmesg_three_0.5s.log (398.1 KB)
It can be seen that the controllability msg ->channel_detup resp. channel_id returned by rtcpu is the icontrol_masg ->header. channel_id of the ivc sent by rtcpu last time.
Then comes the final experiment.
The executed script is as follows,
#!/bin/bash
for ((count=1; count<=30000; count++)); do
echo "Executing iteration $count"
for i in $(seq 0 1); do
./a.out -d /dev/video$i &
done
wait # 等待所有后台任务完成
done
The relevant pstore is pstore_two_camera.zip.
pstore_two_camera.zip (44.0 KB)
It can be seen that when an error occurs, the value of channel_detup_resp. channel_id=0 is incorrect and returns the channel_id of another thread.
At this point, an incorrect channel id can lead to a series of errors, ultimately resulting in a restart.