Vi request timeout leads to NULL pointer dereference in kernel in jetpack 4.4

Steps to reproduce:
jetpack 4.4
no camera connect

Test command:

gst-launch-1.0 -v v4l2src device=/dev/video0 ! xvimag

output error log:

then ctrl+c termination the gst-launch-1.0, Possible null pointer error.

Dec  8 02:26:26 EAXVA04 kernel: [17769.053800] tegra194-vi5 15c10000.vi: uncorr_err: request timed out after 2500 ms
Dec  8 02:26:26 EAXVA04 kernel: [17769.053935] tegra194-vi5 15c10000.vi: err_rec: attempting to reset the capture channel
Dec  8 02:26:26 EAXVA04 kernel: [17769.058192] tegra194-vi5 15c10000.vi: err_rec: successfully reset the capture channel
Dec  8 02:26:29 EAXVA04 kernel: [17771.528879] Unable to handle kernel NULL pointer dereference at virtual address 00000000
Dec  8 02:26:29 EAXVA04 kernel: [17771.529067] Mem abort info:
Dec  8 02:26:29 EAXVA04 kernel: [17771.529120]   ESR = 0x96000005
Dec  8 02:26:29 EAXVA04 kernel: [17771.529198]   Exception class = DABT (current EL), IL = 32 bits
Dec  8 02:26:29 EAXVA04 kernel: [17771.529295]   SET = 0, FnV = 0
Dec  8 02:26:29 EAXVA04 kernel: [17771.529347]   EA = 0, S1PTW = 0
Dec  8 02:26:29 EAXVA04 kernel: [17771.529420] Data abort info:
Dec  8 02:26:29 EAXVA04 kernel: [17771.529471]   ISV = 0, ISS = 0x00000005
Dec  8 02:26:29 EAXVA04 kernel: [17771.529534]   CM = 0, WnR = 0
Dec  8 02:26:29 EAXVA04 kernel: [17771.529591] user pgtable: 4k pages, 39-bit VAs, pgd = ffffffc60152d000
Dec  8 02:26:29 EAXVA04 kernel: [17771.529741] [0000000000000000] *pgd=0000000000000000, *pud=0000000000000000
Dec  8 02:26:29 EAXVA04 kernel: [17771.529868] Internal error: Oops: 96000005 [#1] PREEMPT SMP
Dec  8 02:26:29 EAXVA04 kernel: [17771.529973] Modules linked in: mttcan can_dev can_raw can ppsdriver(O) bnep fuse zram overlay binfmt_misc heart_ctl spidev ar0231 nvgpu ip_tables x_tables
Dec  8 02:26:29 EAXVA04 kernel: [17771.530316] CPU: 5 PID: 23472 Comm: cameramanager Tainted: G        W  O    4.9.140-tegra #27
Dec  8 02:26:29 EAXVA04 kernel: [17771.530971] Hardware name: Jetson-AGX (DT)
Dec  8 02:26:29 EAXVA04 kernel: [17771.531293] task: ffffffc6bb641c00 task.stack: ffffffc6c08d0000
Dec  8 02:26:29 EAXVA04 kernel: [17771.531757] PC is at exit_creds+0x2c/0x78
Dec  8 02:26:29 EAXVA04 kernel: [17771.532079] LR is at __put_task_struct+0x4c/0x140
Dec  8 02:26:29 EAXVA04 kernel: [17771.535933] pc : [<ffffff80080deefc>] lr : [<ffffff80080b012c>] pstate: 60400045
Dec  8 02:26:29 EAXVA04 kernel: [17771.543278] sp : ffffffc6c08d3a50
Dec  8 02:26:29 EAXVA04 kernel: [17771.546691] x29: ffffffc6c08d3a50 x28: 0000000000000000 
Dec  8 02:26:29 EAXVA04 kernel: [17771.552294] x27: ffffffc7ba025800 x26: 0000000000000001 
Dec  8 02:26:29 EAXVA04 kernel: [17771.557892] x25: ffffff8009fd09a0 x24: 0000000000000013 
Dec  8 02:26:29 EAXVA04 kernel: [17771.563484] x23: 0000000000000000 x22: ffffffc6c08d3d10 
Dec  8 02:26:29 EAXVA04 kernel: [17771.568218] x21: ffffffc6fd6f0e30 x20: 0000000000000000 
Dec  8 02:26:29 EAXVA04 kernel: [17771.573206] x19: ffffffc6fd6f0e00 x18: 0000007f8bf83a70 
Dec  8 02:26:29 EAXVA04 kernel: [17771.578980] x17: 0000007f8bef9f90 x16: ffffff800813b3a8 
Dec  8 02:26:29 EAXVA04 kernel: [17771.584668] x15: 0000000000000000 x14: 0000000000000002 
Dec  8 02:26:29 EAXVA04 kernel: [17771.590356] x13: 0000000000001e6a x12: 071c71c71c71c71c 
Dec  8 02:26:29 EAXVA04 kernel: [17771.596122] x11: 000000000000000b x10: 0000000000000a20 
Dec  8 02:26:29 EAXVA04 kernel: [17771.601898] x9 : ffffffc6c08d38a0 x8 : ffffffc6bb642680 
Dec  8 02:26:29 EAXVA04 kernel: [17771.607672] x7 : 0000000000000000 x6 : 0000008157050bcc 
Dec  8 02:26:29 EAXVA04 kernel: [17771.613184] x5 : 0000000000000800 
Dec  8 02:26:29 EAXVA04 kernel: [17771.613635] tegra194-vi5 15c10000.vi: no reply from camera processor
Dec  8 02:26:29 EAXVA04 kernel: [17771.613643] tegra194-vi5 15c10000.vi: uncorr_err: request timed out after 2500 ms
Dec  8 02:26:29 EAXVA04 kernel: [17771.613656] tegra194-vi5 15c10000.vi: err_rec: attempting to reset the capture channel
Dec  8 02:26:29 EAXVA04 kernel: [17771.618158] tegra194-vi5 15c10000.vi: err_rec: successfully reset the capture channel
Dec  8 02:26:29 EAXVA04 kernel: [17771.644163] x4 : 0000000000000000 x3 : 00000000000000de x2 : 0000000000000000 
Dec  8 02:26:29 EAXVA04 kernel: [17771.651342] x1 : 0000000000000000 x0 : 00000000ffffffff 
Dec  8 02:26:29 EAXVA04 kernel: [17771.656682] 
Dec  8 02:26:29 EAXVA04 kernel: [17771.658085] Process cameramanager (pid: 23472, stack limit = 0xffffffc6c08d0000)
Dec  8 02:26:29 EAXVA04 kernel: [17771.664991] Call trace:
Dec  8 02:26:29 EAXVA04 kernel: [17771.667620] [<ffffff80080deefc>] exit_creds+0x2c/0x78
Dec  8 02:26:29 EAXVA04 kernel: [17771.672689] [<ffffff80080b012c>] __put_task_struct+0x4c/0x140
Dec  8 02:26:29 EAXVA04 kernel: [17771.677768] [<ffffff80080dc9bc>] kthread_stop+0x1e4/0x1e8
Dec  8 02:26:29 EAXVA04 kernel: [17771.683367] [<ffffff8008b4b978>] vi5_channel_stop_kthreads+0x40/0x58
Dec  8 02:26:29 EAXVA04 kernel: [17771.689494] [<ffffff8008b4ba1c>] vi5_channel_stop_streaming+0x8c/0xa8
Dec  8 02:26:29 EAXVA04 kernel: [17771.695969] [<ffffff8008b3e374>] tegra_channel_stop_streaming+0x34/0x48
Dec  8 02:26:29 EAXVA04 kernel: [17771.702876] [<ffffff8008b36684>] __vb2_queue_cancel+0x34/0x188
Dec  8 02:26:29 EAXVA04 kernel: [17771.708914] [<ffffff8008b36a34>] vb2_core_streamoff+0x54/0xb8
Dec  8 02:26:29 EAXVA04 kernel: [17771.714338] [<ffffff8008b3ae3c>] vb2_streamoff+0x54/0x88
Dec  8 02:26:29 EAXVA04 kernel: [17771.719155] [<ffffff8008b3aec4>] vb2_ioctl_streamoff+0x54/0x60
Dec  8 02:26:29 EAXVA04 kernel: [17771.724845] [<ffffff8008b150dc>] v4l_streamoff+0x3c/0x50
Dec  8 02:26:29 EAXVA04 kernel: [17771.730179] [<ffffff8008b1a114>] __video_do_ioctl+0x204/0x2c8
Dec  8 02:26:29 EAXVA04 kernel: [17771.736037] [<ffffff8008b19ac0>] video_usercopy+0x2a0/0x6a0
Dec  8 02:26:29 EAXVA04 kernel: [17771.741379] [<ffffff8008b19efc>] video_ioctl2+0x3c/0x50
Dec  8 02:26:29 EAXVA04 kernel: [17771.746887] [<ffffff8008b139d8>] v4l2_ioctl+0x88/0x118
Dec  8 02:26:29 EAXVA04 kernel: [17771.751794] [<ffffff8008272e38>] do_vfs_ioctl+0xb0/0x8d8
Dec  8 02:26:29 EAXVA04 kernel: [17771.757388] [<ffffff80082736ec>] SyS_ioctl+0x8c/0xa8
Dec  8 02:26:29 EAXVA04 kernel: [17771.762032] [<ffffff8008083900>] el0_svc_naked+0x34/0x38
Dec  8 02:26:29 EAXVA04 kernel: [17771.767806] ---[ end trace 9126e289eb796667 ]---

Please using v4l2-ctl to confirm the sensor driver first.
Also get the trace log if unable capture by v4l2-ctl.

https://elinux.org/Jetson/l4t/Camera_BringUp

Thanks

if connected camera, can use v4l2_ctl command to capture imagges:

v4l2-ctl --set-fmt-video=width=1920,height=1080,pixelformat=UYVY --set-ctrl bypass_mode=0 --stream-mmap --stream-count=100000 --stream-to=/dev/null -d /dev/video0

The issue is test no camera connect error process.

Please try below command.

gst-launch-1.0 nvv4l2camerasrc device=/dev/video0 ! 'video/x-raw(memory:NVMM), format=(string)UYVY, width=(int)1920, height=(int)1080, framerate=(fraction)30/1' ! nvvidconv ! fpsdisplaysink text-overlay=0 video-sink=xvimagesink sync=0 -v

What different with our gst test and this ? NULL point problem still exist .

Please apply the patch from below to try.

This is fix jetpack4.6; 4.4 code not compatible.

void vi_capture_shutdown(struct tegra_vi_channel *chan)
{
	struct vi_capture *capture = chan->capture_data;

	dev_dbg(chan->dev, "%s--\n", __func__);
	if (capture == NULL)
		return;

	if (capture->channel_id != CAPTURE_CHANNEL_INVALID_ID)
		vi_capture_reset(chan,
			CAPTURE_CHANNEL_RESET_FLAG_IMMEDIATE);

	if (capture->stream_id != NVCSI_STREAM_INVALID_ID)
		csi_stream_release(chan);

	if (capture->channel_id != CAPTURE_CHANNEL_INVALID_ID)
	{
		int i;
		vi_capture_release(chan,
			CAPTURE_CHANNEL_RESET_FLAG_IMMEDIATE);

		if (capture->is_mem_pinned) {
			for (i = 0; i < capture->queue_depth; i++)
				vi_capture_request_unpin(chan, i);
		}

		capture_common_unpin_memory(&capture->requests);
		kfree(capture->unpins_list);
	}
	kfree(capture);
	chan->capture_data = NULL;
}
struct vi_capture {
	uint16_t channel_id;
	struct device *rtcpu_dev;
	struct tegra_vi_channel *vi_channel;
	struct capture_common_buf requests;
	size_t request_buf_size;
	uint32_t queue_depth;
	uint32_t request_size;
	bool is_mem_pinned;

	struct capture_common_status_notifier progress_status_notifier;
	uint32_t progress_status_buffer_depth;
	bool is_progress_status_notifier_set;

	uint32_t stream_id;
	uint32_t csi_port;
	uint32_t virtual_channel_id;

	uint32_t num_gos_tables;
	const dma_addr_t *gos_tables;

	struct syncpoint_info progress_sp;
	struct syncpoint_info embdata_sp;
	struct syncpoint_info linetimer_sp;

	struct completion control_resp;
	struct completion capture_resp;
	struct mutex control_msg_lock;
	struct CAPTURE_CONTROL_MSG control_resp_msg;

	struct mutex reset_lock;
	struct mutex unpins_list_lock;
	struct capture_common_unpins **unpins_list;

	uint64_t vi_channel_mask;
};

There is no buf_ctx field in vi_capture .

Would you upgrade to r32.7.x to verify the problem??

we can only worked on r32.4.2, this is customer environment,can not upgrade.
BTW,
which release package can ensure this problem solving.

You can verify r32.7.x then try integrate the VI driver to r32.4.2

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.