Vi2 CSI_A syncpt timeout

Hi support,

We are using the HDMI to CSI-2 Bridge (From a third party manufacturer) on a Jetson TK1.
The driver tc358743 is used for this card.

Infos:
Kernel version : 3.10.40 R21.5
tc358743 chip (HDMI 2 CSI bridge).

After some minutes of grabbing V4L frames, the following logs appears on kernel log:

Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.273041] vi vi.0: wait: CSI_A syncpt timeout, syncpt = 287584, err = -11
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.281231] TEGRA_CSI_CSI_CIL_A_STATUS 0x00000012
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.286446] TEGRA_CSI_CSI_CILA_STATUS 0x00060060
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.292137] TEGRA_CSI_CSI_CIL_B_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.297282] TEGRA_CSI_CSI_CIL_C_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.302831] TEGRA_CSI_CSI_CIL_D_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.307930] TEGRA_CSI_CSI_CIL_E_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.313422] TEGRA_CSI_CSI_PIXEL_PARSER_A_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.319315] TEGRA_CSI_CSI_PIXEL_PARSER_B_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.325846] TEGRA_VI_CSI_0_ERROR_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.331024] TEGRA_VI_CSI_1_ERROR_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.535982] vi vi.0: start: CSI_A syncpt timeout, syncpt = 287585, err = -11
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.543439] TEGRA_CSI_CSI_CIL_A_STATUS 0x00000012
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.548589] TEGRA_CSI_CSI_CILA_STATUS 0x00060060
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.553657] TEGRA_CSI_CSI_CIL_B_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.558793] TEGRA_CSI_CSI_CIL_C_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.563919] TEGRA_CSI_CSI_CIL_D_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.569022] TEGRA_CSI_CSI_CIL_E_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.574123] TEGRA_CSI_CSI_PIXEL_PARSER_A_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.579997] TEGRA_CSI_CSI_PIXEL_PARSER_B_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.585841] TEGRA_VI_CSI_0_ERROR_STATUS 0x00000000
Mar  8 13:33:09 qs-labo-001 kernel: [ 8448.591002] TEGRA_VI_CSI_1_ERROR_STATUS 0x00000000
...
...

In our software, we lose some frames due to this synchronization timeout.
After a look on the kernel, these logs are generated by these functions (vi2_capture_start and vi2_capture_wait).
The synchronization timeout is detected by the method nvhost_syncpt_wait_timeout_ext.

Here is the patch we applied on vi2.c in order to work with tc358743:

--- a/drivers/media/platform/soc_camera/tegra_camera/vi2.c
+++ b/drivers/media/platform/soc_camera/tegra_camera/vi2.c
@@ -335,6 +335,7 @@
 #define                CLKSELE         (1 << 21)
 
 #define MIPI_CAL_BASE  0x700e3000
+static void vi2_sw_reset(struct tegra_camera_dev *cam);
 
 static const struct regmap_config mipi_cal_config = {
        .reg_bits = 32,
@@ -609,8 +610,8 @@ static int vi2_capture_setup_csi_0(struct tegra_camera_dev *cam,
                        0x3 | (0x1 << 5) | (0x40 << 8));
 #endif
 
-       TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILA_CONTROL0, 0x9);
-       TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILB_CONTROL0, 0x9);
+       TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILA_CONTROL0, 0x49);
+       TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILB_CONTROL0, 0x49);
        TC_VI_REG_WT(cam, TEGRA_CSI_PIXEL_STREAM_PPA_COMMAND, 0xf007);
        TC_VI_REG_WT(cam, TEGRA_CSI_CSI_PIXEL_PARSER_A_INTERRUPT_MASK, 0x0);
        TC_VI_REG_WT(cam, TEGRA_CSI_PIXEL_STREAM_A_CONTROL0, 0x280301f0);
@@ -650,7 +651,9 @@ static int vi2_capture_setup_csi_0(struct tegra_camera_dev *cam,
                   (icd->current_fmt->code == V4L2_MBUS_FMT_VYUY8_2X8) ||
                   (icd->current_fmt->code == V4L2_MBUS_FMT_YUYV8_2X8) ||
                   (icd->current_fmt->code == V4L2_MBUS_FMT_YVYU8_2X8)) {
-               /* TBD */
+               format = TEGRA_IMAGE_FORMAT_T_U8_Y8__V8_Y8;
+               data_type = TEGRA_IMAGE_DT_YUV422_8;
+               image_size = icd->user_width * 2;
        } else if ((icd->current_fmt->code == V4L2_MBUS_FMT_SBGGR8_1X8) ||
                   (icd->current_fmt->code == V4L2_MBUS_FMT_SGBRG8_1X8)) {
                format = TEGRA_IMAGE_FORMAT_T_L8;
@@ -663,8 +666,7 @@ static int vi2_capture_setup_csi_0(struct tegra_camera_dev *cam,
                image_size = (icd->user_width * 10) >> 3;
        }
 
-       TC_VI_REG_WT(cam, TEGRA_VI_CSI_0_IMAGE_DEF,
-                       (cam->tpg_mode ? 0 : 1 << 24) | (format << 16) | 0x1);
+       TC_VI_REG_WT(cam, TEGRA_VI_CSI_0_IMAGE_DEF, ((format << 16) | 0x1));
 
        TC_VI_REG_WT(cam, TEGRA_VI_CSI_0_CSI_IMAGE_DT, data_type);
 
@@ -709,10 +711,10 @@ static int vi2_capture_setup_csi_1(struct tegra_camera_dev *cam,
 #endif
 
        if (pdata->port == TEGRA_CAMERA_PORT_CSI_B) {
-               TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILC_CONTROL0, 0x9);
-               TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILD_CONTROL0, 0x9);
+               TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILC_CONTROL0, 0x49);
+               TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILD_CONTROL0, 0x49);
        } else if (pdata->port == TEGRA_CAMERA_PORT_CSI_C)
-               TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILE_CONTROL0, 0x9);
+               TC_VI_REG_WT(cam, TEGRA_CSI_PHY_CILE_CONTROL0, 0x49);
 
        TC_VI_REG_WT(cam, TEGRA_CSI_PIXEL_STREAM_PPB_COMMAND, 0xf007);
        TC_VI_REG_WT(cam, TEGRA_CSI_CSI_PIXEL_PARSER_B_INTERRUPT_MASK, 0x0);
@@ -756,7 +758,9 @@ static int vi2_capture_setup_csi_1(struct tegra_camera_dev *cam,
                   (icd->current_fmt->code == V4L2_MBUS_FMT_VYUY8_2X8) ||
                   (icd->current_fmt->code == V4L2_MBUS_FMT_YUYV8_2X8) ||
                   (icd->current_fmt->code == V4L2_MBUS_FMT_YVYU8_2X8)) {
-               /* TBD */
+               format = TEGRA_IMAGE_FORMAT_T_U8_Y8__V8_Y8;
+               data_type = TEGRA_IMAGE_DT_YUV422_8;
+               image_size = icd->user_width * 2;
        } else if ((icd->current_fmt->code == V4L2_MBUS_FMT_SBGGR8_1X8) ||
                   (icd->current_fmt->code == V4L2_MBUS_FMT_SGBRG8_1X8)) {
                format = TEGRA_IMAGE_FORMAT_T_L8;
@@ -769,8 +773,7 @@ static int vi2_capture_setup_csi_1(struct tegra_camera_dev *cam,
                image_size = icd->user_width * 10 / 8;
        }
 
-       TC_VI_REG_WT(cam, TEGRA_VI_CSI_1_IMAGE_DEF,
-                       (cam->tpg_mode ? 0 : 1 << 24) | (format << 16) | 0x1);
+       TC_VI_REG_WT(cam, TEGRA_VI_CSI_1_IMAGE_DEF, ((format << 16) | 0x1));
 
        TC_VI_REG_WT(cam, TEGRA_VI_CSI_1_CSI_IMAGE_DT, data_type);
 
@@ -957,7 +960,7 @@ static void vi2_capture_error_status(struct tegra_camera_dev *cam)
 }
 
 static int vi2_capture_start(struct tegra_camera_dev *cam,
-                            struct tegra_camera_buffer *buf)
+                                     struct tegra_camera_buffer *buf)
 {
        struct soc_camera_device *icd = buf->icd;
        struct soc_camera_subdev_desc *ssdesc = &icd->sdesc->subdev_desc;
@@ -970,6 +973,7 @@ static int vi2_capture_start(struct tegra_camera_dev *cam,
        if (err < 0)
                return err;
 
+       /* Only wait on CSI frame end syncpt if we're using CSI. */
        if (port == TEGRA_CAMERA_PORT_CSI_A) {
                if (!nvhost_syncpt_read_ext_check(cam->ndev,
                                        cam->syncpt_id_csi_a, &val))
@@ -980,6 +984,12 @@ static int vi2_capture_start(struct tegra_camera_dev *cam,
                TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
                             VI_CSI_PPA_FRAME_START | cam->syncpt_id_csi_a);
                TC_VI_REG_WT(cam, TEGRA_VI_CSI_0_SINGLE_SHOT, 0x1);
+               err = nvhost_syncpt_wait_timeout_ext(cam->ndev,
+                               cam->syncpt_id_csi_a,
+                               cam->syncpt_csi_a,
+                               TEGRA_SYNCPT_CSI_WAIT_TIMEOUT,
+                               NULL,
+                               NULL);
        } else if (port == TEGRA_CAMERA_PORT_CSI_B ||
                        port == TEGRA_CAMERA_PORT_CSI_C) {
                if (!nvhost_syncpt_read_ext_check(cam->ndev,
@@ -991,8 +1001,30 @@ static int vi2_capture_start(struct tegra_camera_dev *cam,
                TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
                             VI_CSI_PPB_FRAME_START | cam->syncpt_id_csi_b);
                TC_VI_REG_WT(cam, TEGRA_VI_CSI_1_SINGLE_SHOT, 0x1);
+               err = nvhost_syncpt_wait_timeout_ext(cam->ndev,
+                               cam->syncpt_id_csi_b,
+                               cam->syncpt_csi_b,
+                               TEGRA_SYNCPT_CSI_WAIT_TIMEOUT,
+                               NULL,
+                               NULL);
        }
 
+       /* Mark SOF flag to Zero after we captured the FIRST frame */
+       if (cam->sof)
+               cam->sof = 0;
+       /* Capture syncpt timeout err, then dump error status */
+       if (err) {
+               if (port == TEGRA_CAMERA_PORT_CSI_A)
+                       dev_err(&cam->ndev->dev,
+                               "start: CSI_A syncpt timeout, syncpt = %d, err = %d\n",
+                               cam->syncpt_csi_a, err);
+               else if (port == TEGRA_CAMERA_PORT_CSI_B ||
+                               port == TEGRA_CAMERA_PORT_CSI_C)
+                       dev_err(&cam->ndev->dev,
+                               "start: CSI_B/CSI_C syncpt timeout, syncpt = %d, err = %d\n",
+                               cam->syncpt_csi_b, err);
+               vi2_capture_error_status(cam);
+       }
        return err;
 }
 
@@ -1031,12 +1063,12 @@ static int vi2_capture_wait(struct tegra_camera_dev *cam,
        if (err) {
                if (port == TEGRA_CAMERA_PORT_CSI_A)
                        dev_err(&cam->ndev->dev,
-                               "CSI_A syncpt timeout, syncpt = %d, err = %d\n",
+                               "wait: CSI_A syncpt timeout, syncpt = %d, err = %d\n",
                                cam->syncpt_csi_a, err);
                else if (port == TEGRA_CAMERA_PORT_CSI_B ||
                                port == TEGRA_CAMERA_PORT_CSI_C)
                        dev_err(&cam->ndev->dev,
-                               "CSI_B/CSI_C syncpt timeout, syncpt = %d, err = %d\n",
+                               "wait: CSI_B/CSI_C syncpt timeout, syncpt = %d, err = %d\n",
                                cam->syncpt_csi_b, err);
                vi2_capture_error_status(cam);
        }
@@ -1107,13 +1139,83 @@ static int vi2_capture_done(struct tegra_camera_dev *cam, int port)
 
 static int vi2_capture_stop(struct tegra_camera_dev *cam, int port)
 {
-       u32 reg = (port == TEGRA_CAMERA_PORT_CSI_A) ?
-                 TEGRA_CSI_PIXEL_STREAM_PPA_COMMAND :
-                 TEGRA_CSI_PIXEL_STREAM_PPB_COMMAND;
+       u32 val;
+       int err = 0;
 
-       TC_VI_REG_WT(cam, reg, 0xf002);
+       if (port == TEGRA_CAMERA_PORT_CSI_A) {
+               if (!nvhost_syncpt_read_ext_check(cam->ndev,
+                                                 cam->syncpt_id_csi_a, &val))
+                       cam->syncpt_csi_a = nvhost_syncpt_incr_max_ext(
+                                               cam->ndev,
+                                               cam->syncpt_id_csi_a, 1);
 
-       return 0;
+               /*
+                * Make sure recieve VI_MWA_ACK_DONE of the last frame before
+                * stop and dequeue buffer, otherwise MC error will shows up
+                * for the last frame.
+                */
+               TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
+                            VI_MWA_ACK_DONE | cam->syncpt_id_csi_a);
+
+               /*
+                * Ignore error here and just stop pixel parser after waiting,
+                * even if it's timeout
+                */
+               err = nvhost_syncpt_wait_timeout_ext(cam->ndev,
+                               cam->syncpt_id_csi_a,
+                               cam->syncpt_csi_a,
+                               TEGRA_SYNCPT_CSI_WAIT_TIMEOUT,
+                               NULL,
+                               NULL);
+               TC_VI_REG_WT(cam, TEGRA_CSI_PIXEL_STREAM_PPA_COMMAND, 0xf002);
+       } else if (port == TEGRA_CAMERA_PORT_CSI_B ||
+                       port == TEGRA_CAMERA_PORT_CSI_C) {
+               if (!nvhost_syncpt_read_ext_check(cam->ndev,
+                                                 cam->syncpt_id_csi_b, &val))
+                       cam->syncpt_csi_b = nvhost_syncpt_incr_max_ext(
+                                               cam->ndev,
+                                               cam->syncpt_id_csi_b, 1);
+
+               /*
+                * Make sure recieve VI_MWB_ACK_DONE of the last frame before
+                * stop and dequeue buffer, otherwise MC error will shows up
+                * for the last frame.
+                */
+               TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
+                            VI_MWB_ACK_DONE | cam->syncpt_id_csi_b);
+
+               /*
+                * Ignore error here and just stop pixel parser after waiting,
+                * even if it's timeout
+                */
+               err = nvhost_syncpt_wait_timeout_ext(cam->ndev,
+                               cam->syncpt_id_csi_b,
+                               cam->syncpt_csi_b,
+                               TEGRA_SYNCPT_CSI_WAIT_TIMEOUT,
+                               NULL,
+                               NULL);
+               TC_VI_REG_WT(cam, TEGRA_CSI_PIXEL_STREAM_PPB_COMMAND, 0xf002);
+       }
+
+       if (err){
+               if (port == TEGRA_CAMERA_PORT_CSI_A)
+                       dev_err(&cam->ndev->dev,
+                       "stop: CSI_A syncpt timeout, syncpt = %d, err = %d\n",
+                       cam->syncpt_csi_a, err);
+               else if (port == TEGRA_CAMERA_PORT_CSI_B ||
+                               port == TEGRA_CAMERA_PORT_CSI_C)
+                       dev_err(&cam->ndev->dev,
+                       "stop: CSI_B/CSI_C syncpt timeout, syncpt = %d, err = %d\n",
+                               cam->syncpt_csi_b, err);
+               vi2_capture_error_status(cam);
+               vi2_sw_reset(cam);
+               vi2_free_syncpts(cam);
+               vi2_init_syncpts(cam);
+       }
+       // set sof to 1 so that when streamon is again called, CSI is configured again
+       cam->sof = 1;
+
+       return err;
 }
 
 /* Reset VI2/CSI2 when activating, no sepecial ops for deactiving  */

Questions:

  1. Is it a known issue on kernel 3.10.40 R21.5 ?
  2. If it is a known issue, is there an available kernel patch to solve this problem ?

Here is the log when the vi2 is initialized:

Mar  8 09:47:41 qs-labo-001 kernel: [  178.706891] tegra_camera: version magic '3.10.40-gafe5771-dirty SMP preempt mod_unload ARMv7 p2v8 ' should be '3.10.40-g83c7c62-dirty SMP preempt mod_unload ARMv7 p2v8 '
Mar  8 09:47:58 qs-labo-001 kernel: [  195.626195] vi vi.0: initialized
Mar  8 09:47:58 qs-labo-001 kernel: [  195.626246] soc-camera-pdrv soc-camera-pdrv.4: Probing soc-camera-pdrv.4
Mar  8 09:47:58 qs-labo-001 kernel: [  195.634135] tc358743_probe : CHIPID=0x00 REVISIONID=0x00
Mar  8 09:47:59 qs-labo-001 kernel: [  197.248559] Write EDID: 256 (1)
Mar  8 09:48:00 qs-labo-001 kernel: [  197.367689] Activate EDID Mode DDC2B 1 0
Mar  8 09:48:00 qs-labo-001 kernel: [  197.369093] tc358743_enable_interrupts: cable connected = 1
Mar  8 09:48:00 qs-labo-001 kernel: [  197.991056] tc358743_probe : force initial interrupt mask: 0x043f : Reset Needed ?
Mar  8 09:48:00 qs-labo-001 kernel: [  197.998711] tc358743_init_interrupts : Done
Mar  8 09:48:00 qs-labo-001 kernel: [  197.999302] tc358743_probe : IRQ 403 5V Present
Mar  8 09:48:00 qs-labo-001 kernel: [  197.999379] tc358743_probe : devm_request_threaded_irq succeed
Mar  8 09:48:00 qs-labo-001 kernel: [  197.999974] tc358743_enable_interrupts: cable connected = 1
Mar  8 09:48:00 qs-labo-001 kernel: [  198.005226] tc358743_g_edid : Dump EDID lenRead=256 Mode=2
Mar  8 09:48:00 qs-labo-001 kernel: [  198.006109] 00 ff ff ff ff ff ff 00 46 65 00 50 01 00 00 00 01
Mar  8 09:48:00 qs-labo-001 kernel: [  198.021077] 1a 01 03 a2 00 00 00 02 ee 95 a3 54 4c 99 26 0f
Mar  8 09:48:00 qs-labo-001 kernel: [  198.035165] 50 54 04 43 00 01 01 01 01 01 01 01 01 01 01 01
Mar  8 09:48:00 qs-labo-001 kernel: [  198.049241] 01 01 01 01 01 02 3a 80 18 71 38 2d 40 58 2c 45
Mar  8 09:48:00 qs-labo-001 kernel: [  198.063322] 00 c4 8e 21 00 00 1e 00 00 00 fc 00 51 53 20 45
Mar  8 09:48:00 qs-labo-001 kernel: [  198.077411] 4e 47 49 4e 45 20 48 49 44 00 00 00 fd 00 18 4b
Mar  8 09:48:00 qs-labo-001 kernel: [  198.091489] 0f 51 09 00 0a 20 20 20 20 20 20 00 00 00 fe 00
Mar  8 09:48:00 qs-labo-001 kernel: [  198.105581] 48 49 44 56 32 30 31 36 51 31 20 20 20 01 a1 02
Mar  8 09:48:00 qs-labo-001 kernel: [  198.119658] 03 16 f0 41 90 23 09 07 07 83 01 00 00 67 03 0c
Mar  8 09:48:00 qs-labo-001 kernel: [  198.133735] 00 00 00 b8 21 02 3a 80 18 71 38 2d 40 58 58 45
Mar  8 09:48:00 qs-labo-001 kernel: [  198.147810] 00 80 38 74 00 00 18 00 00 00 00 00 00 00 00 00
Mar  8 09:48:00 qs-labo-001 kernel: [  198.161913] 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Mar  8 09:48:00 qs-labo-001 kernel: [  198.175990] 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Mar  8 09:48:00 qs-labo-001 kernel: [  198.190082] 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Mar  8 09:48:00 qs-labo-001 kernel: [  198.204164] 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Mar  8 09:48:00 qs-labo-001 kernel: [  198.218249] 00 00 00 00 00 00 00 00 00 00 00 00 00 70 84
Mar  8 09:48:00 qs-labo-001 kernel: [  198.234663] Link status : Signal OK Sync OK  DVI PHY with DE PLL lock 5V Yes<5>[  198.234732] vi vi.0: Supporting mbus format code 0x2006 using YUV422 (UYVY) packed
Mar  8 09:48:00 qs-labo-001 kernel: [  198.234737] vi vi.0: Supporting mbus format code 0x2006 using YUV422 (VYUY) packed
Mar  8 09:48:00 qs-labo-001 kernel: [  198.234742] vi vi.0: Supporting mbus format code 0x2006 using YUV422 (YUYV) packed
Mar  8 09:48:00 qs-labo-001 kernel: [  198.234747] vi vi.0: Supporting mbus format code 0x2006 using YUV422 (YVYU) packed
Mar  8 09:48:00 qs-labo-001 kernel: [  198.234751] vi vi.0: Supporting mbus format code 0x2006 using YUV420 (YU12) planar
Mar  8 09:48:00 qs-labo-001 kernel: [  198.234756] vi vi.0: Supporting mbus format code 0x2006 using YVU420 (YV12) planar
Mar  8 09:48:00 qs-labo-001 kernel: [  198.235758] vi vi.0: Tegra camera driver loaded.
Mar  8 09:48:00 qs-labo-001 kernel: [  198.238851] vi vi.1: initialized
Mar  8 09:48:00 qs-labo-001 kernel: [  198.238928] soc-camera-pdrv soc-camera-pdrv.1: Probing soc-camera-pdrv.1
Mar  8 09:48:00 qs-labo-001 kernel: [  198.241462] tc358743_s_power : state = 1
Mar  8 09:48:00 qs-labo-001 kernel: [  198.245355] vi vi.1: Tegra camera driver loaded.
Mar  8 09:48:00 qs-labo-001 kernel: [  198.248932] Image is 1920x1080@60, ~1991 Gbps bandwidth (~996MHz/lane) [Wrong width]
Mar  8 09:48:00 qs-labo-001 kernel: [  198.248939] Wrong width (1920 vs 640)
Mar  8 09:48:00 qs-labo-001 kernel: [  198.254260] Setting pll to frequency 1075 (1075) MHz
Mar  8 09:48:00 qs-labo-001 kernel: [  198.264739] tc358743_enable_interrupts: cable connected = 1
Mar  8 09:48:00 qs-labo-001 kernel: [  198.267639] Starting CSI 2 LANS
Mar  8 09:48:01 qs-labo-001 kernel: [  198.280701] tc358743_s_power : state = 0

PS: Previously we used the kernel 3.10.40 R21.4 where tearing appears on V4L grabbed frames in some use case.
With the kernel version (3.10.40 R21.5), the tearing effect disappears or are not too obvious to see .

It’s not a known issue. The problem should cause by the sensor output MIPI timing cause. You may need to probe the sensor output signal to make sure the timing is all clear. The log shows the “Start of Transmission Multi Bit Error”

Hi,

When this timeout synchronization occurs, a V4L stream off (VIDIOC_STREAMOFF) followed by a stream ON (VIDIOC_STREAMON) restarts the V4L capture.
But after some minutes, this synchronization timeout appears again.

We notice that we did not get this problem on kernel 3.10.40 R21.4.

Why this problem is present in kernel 3.10.40 R21.5 and not on kernel 3.10.40 R21.4 ?

Best regards,

Hi
That’s interesting, however this release was verified by QA with reference sensor board before it public. From the log it show the start of transmission multi bit error that means you need to make sure the MIPI output need to follow the spec. First you need to probe and make sure THS-PERPARE and THS-ZERO are follow the MIPI spec. After that you can try to fine tune the ths_settle time it range it 1 - 15 (REG TEGRA_CSI_PHY_CILA_CONTROL0 bit 0:5)

Second try to config the sensor as discontinuous mode.

Hi Romary,

Have you tried our suggestions? Does that help on your case?

Thanks

Hello,

It seems that timeout VI/SYNCPT freeze may occurs on high IRQ load condition, when syncpt IRQs are missed .

We are still investiguating that hypothesis, that would conclude to a non hardware issue.

Hi Romary

  1. Could you try to boost the system to performance mode, you can reference to below link
    http://elinux.org/Jetson/Performance
  2. Rolling back the vi driver (soc_camera/tegra_camera) to 21.4 to check if any regression from the vi driver.

Hello,

Since migrating from 21.4 to 21.5, we still have VI Syncpt timeout issues, happening in a random manner (it occurs from 1h to 2 days of endurance test).

The bas new is that we lost the ability to print CSI status since 21.5

see : https://devtalk.nvidia.com/default/topic/1015105/jetson-tk1-read-csi-status-registry-value-causes-kernel-freeze/

We did many CPU / OS stress tests, with stress and stress-ng binaries, without beeing able to make the issue reproducible.

PLeas help us to restore the ability to print CSI CIL STATUS on error

1 Like

Romary

  1. If you can 100% make sure the 21.4 without problem, could you try rolling the VI driver back? And could you confirm the timeout issue only repo with run stress tools?

  2. Does the test process will open/close the camera? Can the test process only open and close until the test finish.

1- Running 21.4 is useless for us since it as sever defect on image integrity and image lost rate (on a 1080p60fps basis).

What’s more our application has changed since 21.5, because we added webcam support in our USB device gadget driver, and this specific change added 5000 irq/sec on the system.

It is not clear for us that stress test is a key factor.
Here is my best summary :
- 21.4 with our old application(no webcam) can hold 30 weeks of stable bad/useless grabbing (image lost).
- 21.5 without our application can hold 1 week of perfect grabbing.
- 21.5 with our application crash with syncpt timeout within a 12h endurance test.

2.- The application only does IOCTL once the camera device is open, using V4L2 api (select, dequeue queue). On syncpt timeout, we do a STREAMOFF => STREAMON sequence. should we do an close / open sequence ?

Again, since 21.5 we lost the ability to display CSI status register.

We are using perf event trace to debug the issue:

In normal running condition, we have :

tegra-vi/captur  1721 [002]  4213.672477: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=476443
         swapper     0 [000]  4213.672506: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  4213.689117: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1721 [002]  4213.689144: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=476444 error=0
 tegra-vi/captur  1721 [002]  4213.689150: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=476445
         swapper     0 [000]  4213.689189: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  4213.705801: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1721 [002]  4213.705824: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=476446 error=0
 tegra-vi/captur  1721 [002]  4213.705829: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=476447
         swapper     0 [000]  4213.705873: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta 12699 [000]  4213.722482: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1721 [002]  4213.722523: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=476448 error=0
 tegra-vi/captur  1721 [002]  4213.722532: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=476449
 xmlrpcphone-sta 12699 [000]  4213.722556: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta 12699 [000]  4213.739165: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1721 [002]  4213.739212: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=476450 error=0
 tegra-vi/captur  1721 [002]  4213.739220: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=476451
 xmlrpcphone-sta 12699 [000]  4213.739238: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta 12699 [000]  4213.755849: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1721 [002]  4213.755881: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=476452 error=0
 tegra-vi/captur  1721 [002]  4213.755889: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=476453
 xmlrpcphone-sta 12699 [000]  4213.755921: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  4213.772532: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1721 [002]  4213.772560: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=476454 error=0
 tegra-vi/captur  1721 [002]  4213.772566: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=476455
         swapper     0 [000]  4213.772605: irq:irq_handler_entry: irq=97 name=host_syncpt

In that case, we get a frame every 16.6ms, as expected.

In another test case, juste before syncpt timeout, we have :

tegra-vi/captur  1705 [000]  2773.970859: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282060 error=0
 tegra-vi/captur  1705 [000]  2773.970872: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282061
 tegra-vi/captur  1705 [000]  2773.970875: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8093 [000]  2773.987483: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2773.987559: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282062 error=0
 tegra-vi/captur  1705 [000]  2773.987567: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282063
 xmlrpcphone-sta  8093 [000]  2774.004241: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  2774.020851: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.020893: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282064 error=0
 tegra-vi/captur  1705 [000]  2774.020903: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282065
         swapper     0 [000]  2774.020930: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  2774.037533: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.037575: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282066 error=0
 tegra-vi/captur  1705 [000]  2774.037584: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282067
         swapper     0 [000]  2774.037616: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  2774.054217: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.054844: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282068 error=0
 tegra-vi/captur  1705 [000]  2774.054857: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282069
         swapper     0 [000]  2774.070974: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  2774.087581: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.087666: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282070 error=0
 tegra-vi/captur  1705 [000]  2774.087673: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282071
 xmlrpcphone-sta  8107 [000]  2774.104339: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8107 [000]  2774.120947: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.120991: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282072 error=0
 tegra-vi/captur  1705 [000]  2774.121000: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282073
 tegra-vi/captur  1705 [000]  2774.121036: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8107 [000]  2774.137667: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.137708: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282074 error=0
 tegra-vi/captur  1705 [000]  2774.137716: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282075
 xmlrpcphone-sta  8107 [000]  2774.154388: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  1703 [000]  2774.170998: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.171054: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282076 error=0
 tegra-vi/captur  1705 [000]  2774.171067: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282077
 tegra-vi/captur  1705 [000]  2774.171071: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  2774.187693: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.187729: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282078 error=0
 tegra-vi/captur  1705 [000]  2774.187738: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282079
         swapper     0 [000]  2774.187785: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  2774.204363: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.204392: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282080 error=0
 tegra-vi/captur  1705 [000]  2774.204398: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282081
         swapper     0 [000]  2774.204437: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8106 [000]  2774.221046: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.221084: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282082 error=0
 tegra-vi/captur  1705 [000]  2774.221091: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282083
 xmlrpcphone-sta  8106 [000]  2774.221120: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8106 [000]  2774.237746: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.237843: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282084 error=0
 tegra-vi/captur  1705 [000]  2774.237858: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282085
 xmlrpcphone-sta  8106 [000]  2774.254486: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8106 [000]  2774.271095: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.271129: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282086 error=0
 tegra-vi/captur  1705 [000]  2774.271137: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282087
 xmlrpcphone-sta  8106 [000]  2774.271169: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000]  2774.287778: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.287805: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282088 error=0
 tegra-vi/captur  1705 [000]  2774.287811: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282089
         swapper     0 [000]  2774.287852: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8107 [000]  2774.304460: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.304494: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282090 error=0
 tegra-vi/captur  1705 [000]  2774.304500: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282091
 tegra-vi/captur  1705 [000]  2774.304534: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8107 [000]  2774.321145: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.321203: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282092 error=0
 tegra-vi/captur  1705 [000]  2774.321219: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.321323: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282093

And then Syncpt timeout happen on the next milliseconds :

tegra-vi/captur  1705 [000]  2774.520711: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282095 error=-11
        kthreadd  8108 [002]  2774.922059: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282097
 xmlrpcphone-sta  8107 [000]  2774.923315: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8107 [000]  2774.955100: irq:irq_handler_entry: irq=97 name=host_syncpt
        kthreadd  8108 [002]  2775.121669: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=282098 error=-11

If you look at the kernel timestamp previous syncpt timeout, you may find strange sequence like :

tegra-vi/captur  1705 [000]  2774.237858: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=282085
 xmlrpcphone-sta  8106 [000]  2774.254486: irq:irq_handler_entry: irq=97 name=host_syncpt
 xmlrpcphone-sta  8106 [000]  2774.271095: irq:irq_handler_entry: irq=97 name=host_syncpt
 tegra-vi/captur  1705 [000]  2774.271129: tegravi:tegravi_capturewait_exit: port=1 idcsi=9

The driver silently miss a frame in that case, juste before syncpt timeout.

Could this issue be linked with the “low highmem value” :

https://devtalk.nvidia.com/default/topic/1015273/jetson-tk1/kernel-21-5-on-tk1-strange-outofmemory-killer-and-low-highmem-value/

cat /proc/buddyinfo 
Node 0, zone   Normal     64     34    156     96     71     14     15      6      3      2     17 
Node 0, zone  HighMem      6      0      0      1      0      0      0      1      0      1      0

Romary
The capture wait is waited for start of frame(SOF), this case should be the capture thread tiggle capture to wait SOF but the during that time it already gone so wait for the next frame’s SOF. Seems the frame rate is 60, could you try lower frame rate like 15 to monitor if it’s still missing any frame?

Unfortunatly, since our video source are PCs machine that doesn’t support a lower rate that 60fps.

My point of view is that a low level part of the Tegra CPU ( IRQ handling, MMU config), is freezing the OS.

Since you are using workqueue (IRQ => SYNCPT WQ => VI WQ) in your driver, any kernel misfunction could lead to capture misfunction.

Romary
I check again with your #10 says “21.5 without our application can hold 1 week of perfect grabbing.” does this test use the yavta? And application for 21.5 have add web-cam support can it remove to verify?

For the status reg print problem can you check can it be accessed before the syncpt timeout.

And you need to close and reopen it if the syncpt timeout happened.

“21.5 without our application can hold 1 week of perfect grabbing” was corresponding to very low CPU usage (just grabing frame and checking pixel image was what we expect ) AND no USB device usage.

removing USB webcam may lead to a very long test case, since we have a 12h long timeframe in nominal operation : it could last a week… not very helpfull.

Our proposal is to speed up by x2 all userland threads and usb device webcam bandwidth, by upgrading input and output from 15fps to 30fps. It won’t have any direct impact on CSI input driver (syncpt/vi kthread), since it is running at 60fps, but it will double USB device IRQ and bandwidth, and increasing CPU and EMC usage.

We made a sysfs interface to test syncpt statusregister, and it always make the system crash.

we failed to upgrade load on userland threads and usb device webcam bandwidth, due to cpu and memory limitation.

We added some perf event tracepoint on the capture workflow :

Vi Capture Done :

_________swapper     0 [000] 41774.549309: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000] 41774.549314: irq:irq_handler_exit: irq=97 ret=handled
  host_syncpt_hp    48 [002] 41774.549324: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72522300
  host_syncpt_hp    48 [002] 41774.549327: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72522300
  host_syncpt_hp    48 [002] 41774.549330: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549340: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549343: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549344: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549347: tegravi:tegravi_capturedone_exit: port=1 idcsi=9 syncpt=4987469 error=0
 tegra-vi/captur  5206 [003] 41774.549351: tegravi:tegravi_capturedone_enter: port=1 idcsi=9 syncpt=4987469

Vi Capture Start/Wait :

_________swapper     0 [000] 41774.549371: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000] 41774.549377: irq:irq_handler_exit: irq=97 ret=handled
  host_syncpt_hp    48 [002] 41774.549385: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72585133
  host_syncpt_hp    48 [002] 41774.549388: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72585133
  host_syncpt_hp    48 [002] 41774.549391: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549398: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549401: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549403: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549405: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=4987470 error=0
 tegra-vi/captur  5205 [001] 41774.549409: tegravi:tegravi_capturestart_enter: port=1 idcsi=9 syncpt=4987470
 tegra-vi/captur  5205 [001] 41774.549413: tegravi:tegravi_capturestart_exit: port=1 idcsi=9 syncpt=4987471 error=0
 tegra-vi/captur  5205 [001] 41774.549415: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=4987471

Vi Capture Done : 16 ms later (16fps)

_________swapper     0 [000] 41774.565985: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000] 41774.565990: irq:irq_handler_exit: irq=97 ret=handled
  host_syncpt_hp    48 [002] 41774.565996: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.89198288
  host_syncpt_hp    48 [002] 41774.565999: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.89198288
  host_syncpt_hp    48 [002] 41774.566001: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566010: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566012: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566013: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566015: tegravi:tegravi_capturedone_exit: port=1 idcsi=9 syncpt=4987471 error=0
 tegra-vi/captur  5206 [003] 41774.566018: tegravi:tegravi_capturedone_enter: port=1 idcsi=9 syncpt=4987471

What are the timing requirement on the following ops ( any datashhet pointer ?) , in milli or micro seconds [ max allowed value ] :

  • Capture start : Between IRQ 97 and those lines :
err = vi2_capture_buffer_setup(cam, buf);
        if (err < 0)
                return err;

        /* Only wait on CSI frame end syncpt if we're using CSI. */
        if (port == TEGRA_CAMERA_PORT_CSI_A) {
                if (!nvhost_syncpt_read_ext_check(cam->ndev,
                                        cam->syncpt_id_csi_a, &val))
                        cam->syncpt_csi_a = nvhost_syncpt_incr_max_ext(
                                                cam->ndev,
                                                cam->syncpt_id_csi_a, 1);

                TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
                             VI_CSI_PPA_FRAME_START | cam->syncpt_id_csi_a);
                TC_VI_REG_WT(cam, TEGRA_VI_CSI_0_SINGLE_SHOT, 0x1);
  • Capture done : Between IRQ 97 and those lines :
if (!nvhost_syncpt_read_ext_check(cam->ndev,
                                                  cam->syncpt_id_csi_a, &val))
                        cam->syncpt_csi_a = nvhost_syncpt_incr_max_ext(
                                                cam->ndev,
                                                cam->syncpt_id_csi_a, 1);

                /*
                 * Make sure recieve VI_MWA_ACK_DONE of the last frame before
                 * stop and dequeue buffer, otherwise MC error will shows up
                 * for the last frame.
                 */
                TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
                             VI_MWA_ACK_DONE | cam->syncpt_id_csi_a);

                /*
                 * Ignore error here and just stop pixel parser after waiting,
                 * even if it's timeout
                 */
                err = nvhost_syncpt_wait_timeout_ext(cam->ndev,
                                cam->syncpt_id_csi_a,
                                cam->syncpt_csi_a,
                                TEGRA_SYNCPT_CSI_WAIT_TIMEOUT,
                                NULL,
                                NULL);

we failed to upgrade load on userland threads and usb device webcam bandwidth, due to cpu and memory limitation.

We added some perf event tracepoint on the capture workflow :

Vi Capture Done :

_________swapper     0 [000] 41774.549309: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000] 41774.549314: irq:irq_handler_exit: irq=97 ret=handled
  host_syncpt_hp    48 [002] 41774.549324: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72522300
  host_syncpt_hp    48 [002] 41774.549327: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72522300
  host_syncpt_hp    48 [002] 41774.549330: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549340: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549343: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549344: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72522300
 tegra-vi/captur  5206 [003] 41774.549347: tegravi:tegravi_capturedone_exit: port=1 idcsi=9 syncpt=4987469 error=0
 tegra-vi/captur  5206 [003] 41774.549351: tegravi:tegravi_capturedone_enter: port=1 idcsi=9 syncpt=4987469

Vi Capture Start/Wait :

_________swapper     0 [000] 41774.549371: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000] 41774.549377: irq:irq_handler_exit: irq=97 ret=handled
  host_syncpt_hp    48 [002] 41774.549385: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72585133
  host_syncpt_hp    48 [002] 41774.549388: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72585133
  host_syncpt_hp    48 [002] 41774.549391: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549398: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549401: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549403: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.72585133
 tegra-vi/captur  5205 [001] 41774.549405: tegravi:tegravi_capturewait_exit: port=1 idcsi=9 syncpt=4987470 error=0
 tegra-vi/captur  5205 [001] 41774.549409: tegravi:tegravi_capturestart_enter: port=1 idcsi=9 syncpt=4987470
 tegra-vi/captur  5205 [001] 41774.549413: tegravi:tegravi_capturestart_exit: port=1 idcsi=9 syncpt=4987471 error=0
 tegra-vi/captur  5205 [001] 41774.549415: tegravi:tegravi_capturewait_enter: port=1 idcsi=9 syncpt=4987471

Vi Capture Done : 16 ms later (16fps)

_________swapper     0 [000] 41774.565985: irq:irq_handler_entry: irq=97 name=host_syncpt
         swapper     0 [000] 41774.565990: irq:irq_handler_exit: irq=97 ret=handled
  host_syncpt_hp    48 [002] 41774.565996: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.89198288
  host_syncpt_hp    48 [002] 41774.565999: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.89198288
  host_syncpt_hp    48 [002] 41774.566001: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566010: tegravi:tegravi_syncpt_process_wait_list_enter: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566012: tegravi:tegravi_syncpt_process_wait_list_before_run_handlers: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566013: tegravi:tegravi_syncpt_process_wait_list_exit: idcsi=9 ts=41774.89198288
 tegra-vi/captur  5206 [003] 41774.566015: tegravi:tegravi_capturedone_exit: port=1 idcsi=9 syncpt=4987471 error=0
 tegra-vi/captur  5206 [003] 41774.566018: tegravi:tegravi_capturedone_enter: port=1 idcsi=9 syncpt=4987471

What are the timing requirement on the following ops ( any datashhet pointer ?) , in milli or micro seconds [ max allowed value ] :

  • Capture start : Between IRQ 97 and those lines :
err = vi2_capture_buffer_setup(cam, buf);
        if (err < 0)
                return err;

        /* Only wait on CSI frame end syncpt if we're using CSI. */
        if (port == TEGRA_CAMERA_PORT_CSI_A) {
                if (!nvhost_syncpt_read_ext_check(cam->ndev,
                                        cam->syncpt_id_csi_a, &val))
                        cam->syncpt_csi_a = nvhost_syncpt_incr_max_ext(
                                                cam->ndev,
                                                cam->syncpt_id_csi_a, 1);

                TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
                             VI_CSI_PPA_FRAME_START | cam->syncpt_id_csi_a);
                TC_VI_REG_WT(cam, TEGRA_VI_CSI_0_SINGLE_SHOT, 0x1);
  • Capture done : Between IRQ 97 and those lines :
if (!nvhost_syncpt_read_ext_check(cam->ndev,
                                                  cam->syncpt_id_csi_a, &val))
                        cam->syncpt_csi_a = nvhost_syncpt_incr_max_ext(
                                                cam->ndev,
                                                cam->syncpt_id_csi_a, 1);

                /*
                 * Make sure recieve VI_MWA_ACK_DONE of the last frame before
                 * stop and dequeue buffer, otherwise MC error will shows up
                 * for the last frame.
                 */
                TC_VI_REG_WT(cam, TEGRA_VI_CFG_VI_INCR_SYNCPT,
                             VI_MWA_ACK_DONE | cam->syncpt_id_csi_a);

                /*
                 * Ignore error here and just stop pixel parser after waiting,
                 * even if it's timeout
                 */
                err = nvhost_syncpt_wait_timeout_ext(cam->ndev,
                                cam->syncpt_id_csi_a,
                                cam->syncpt_csi_a,
                                TEGRA_SYNCPT_CSI_WAIT_TIMEOUT,
                                NULL,
                                NULL);

@Romary
Sorry, I am not really understand your question. Do you mean the time consume from capture start to capture done?

Usually it’s one frame timing like about 33ms if frame rate is 30. And if > 200 will cause timeout.

#define TEGRA_SYNCPT_CSI_WAIT_TIMEOUT             200

Your Capture / VI2 kernel driver has several dependancies due to its architecture:

  • multiple workqueue : IRQ => Syncpt WQ => VI wait+start WQ & VI done WQ
  • kernel scheduling of kthread handling workqueue (syncpt + kworker).
  • kernel memory (slab allocator) since the code do kmalloc/kfree

Any system freeze , that could comes from other IRQ/kthread with higer priority (kmalloc memory fregmentation, cache mismatch on memcpy and so on…), could introduce latency between syncpt IRQ trigger and capture_done() / capture_start() function call, and consequently VI register access.

Those random, impredictable kernel latencies could be fatal to the VI/CSI engine operation, and generate “false negative” syncpt timeout.

We suspect some hardware timer to be much lower than TEGRA_SYNCPT_CSI_WAIT_TIMEOUT 200ms which is a software timeout.

so , what are the maximun values for those kernel internal latency between IRQ and capture_done() / capture_start() call ?. We assume that if those latency are too big, the next syncpt will be missed, and a syncpt timeout will occurs.

We are ask this specific question, because our USB gadget driver generates lots of IRQs (3000/s) at a higher priority than host_syncpt

CPU0       CPU1       CPU2       CPU3       
 29:   26677917    5376057   17085635    4851723       GIC  arch_timer
.....
 <b>52</b>:  215877794          0          0          0       GIC  tegra-otg, tegra-udc
....
 <b>97</b>:    7717801          0          0          0       GIC  host_syncpt
.....

So in our case, we know that some tegra-udcoperation [ from our UVC webcam driver] can take a long time (1k of data has to be copied between buffer for each packet + kmalloc/kfree at begin/stop of each image [fps=15/s]), up to 2-3 milliconde easily.

Could we put the host_syncpt IRQ at a higher priotiy, of the tegra-UDC at a lower IRQ priority ?