Jetson TX2 kernel crash on tegra_xudc_gadget_pullup

Hi,

I am using the g_webcam driver in the Jetson TX2 (with the proper USB Bulk modifications) however I noticed the kernel always crashes if the kernel module is loaded with no USB cable connected to the host PC.

Debugging the crash it looks like it is independent on the Iso or Bulk mode but on the call to the uvc_function_connect that ends up calling tegra_xudc_gadget_pullup as shown in the log below:

Jun 24 20:16:46 nvidia-desktop kernel: [  506.800540] g_webcam gadget: uvc_function_bind with bulk support
Jun 24 20:16:46 nvidia-desktop kernel: [  506.800877] g_webcam gadget: Webcam Video Gadget
Jun 24 20:16:46 nvidia-desktop kernel: [  506.800885] g_webcam gadget: g_webcam ready
Jun 24 20:16:46 nvidia-desktop kernel: [  506.800898] tegra-xudc-new 3550000.xudc: exiting ELPG
Jun 24 20:16:46 nvidia-desktop kernel: [  506.807501] tegra-xudc-new 3550000.xudc: exiting ELPG done
Jun 24 20:16:46 nvidia-desktop kernel: [  506.807523] tegra-xudc-new 3550000.xudc: ep 0 (type: 0, dir: out) enabled
Jun 24 20:16:46 nvidia-desktop kernel: [  506.807910] tegra-xudc-new 3550000.xudc: entering ELPG
Jun 24 20:16:46 nvidia-desktop kernel: [  506.809142] BUG: scheduling while atomic: v4l_id/7375/0x00000002
Jun 24 20:16:46 nvidia-desktop kernel: [  506.809568] tegra-xudc-new 3550000.xudc: entering ELPG done
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815186] Modules linked in: g_webcam fuse zram overlay bcmdhd 
cfg80211 nvgpu ov5693 spidev bluedroid_pm ip_tables x_tables [last unloaded: g_ether]
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815212] CPU: 3 PID: 7375 Comm: v4l_id Not tainted 4.9.140-tegra #7
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815214] Hardware name: quill (DT)
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815216] Call trace:
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815224] [<ffffff800808c038>] dump_backtrace+0x0/0x240
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815228] [<ffffff800808c6a4>] show_stack+0x24/0x30
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815232] [<ffffff800846a1e0>] dump_stack+0x98/0xc0
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815236] [<ffffff80080e70bc>] __schedule_bug+0x64/0x80
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815241] [<ffffff8008f6281c>] __schedule+0x60c/0x780
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815243] [<ffffff8008f629d0>] schedule+0x40/0xa8
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815248] [<ffffff800879a114>] rpm_resume+0xbc/0x710
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815250] [<ffffff800879a7b4>] __pm_runtime_resume+0x4c/0x70
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815255] [<ffffff8008a99a84>] tegra_xudc_gadget_pullup+0x4c/0x160
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815257] [<ffffff8008a95f24>] usb_gadget_connect+0x3c/0x148
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815259] [<ffffff8008a96404>] usb_gadget_activate+0x114/0x120
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815262] [<ffffff8008a8e664>] usb_function_activate+0x54/0xa0
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815266] [<ffffff8008aba1c8>] uvc_function_connect+0x28/0x60
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815268] [<ffffff8008abaff0>] uvc_v4l2_open+0x70/0x90
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815272] [<ffffff8008b15338>] v4l2_open+0x80/0x118
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815276] [<ffffff800826fd04>] chrdev_open+0x94/0x198
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815279] [<ffffff80082667d0>] do_dentry_open+0x1b8/0x318
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815281] [<ffffff8008267d78>] vfs_open+0x58/0x88
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815284] [<ffffff800827b034>] do_last+0x454/0xe60
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815287] [<ffffff800827bad0>] path_openat+0x90/0x378
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815289] [<ffffff800827d040>] do_filp_open+0x70/0xe8
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815291] [<ffffff800826823c>] do_sys_open+0x174/0x258
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815293] [<ffffff80082683a4>] SyS_openat+0x3c/0x50
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815296] [<ffffff800808391c>] __sys_trace_return+0x0/0x4
Jun 24 20:16:46 nvidia-desktop kernel: [  506.815330] tegra-xudc-new 3550000.xudc: exiting ELPG
Jun 24 20:16:46 nvidia-desktop kernel: [  506.817030] tegra-xudc-new 3550000.xudc: exiting ELPG done
Jun 24 20:16:46 nvidia-desktop kernel: [  506.817075] Unable to handle kernel paging request at virtual address 7f89976f50
Jun 24 20:16:46 nvidia-desktop kernel: [  506.824518] tegra-xudc-new 3550000.xudc: entering ELPG
Jun 24 20:16:46 nvidia-desktop kernel: [  506.824536] Mem abort info:
Jun 24 20:16:46 nvidia-desktop kernel: [  506.827338]   ESR = 0x82000007
Jun 24 20:16:46 nvidia-desktop kernel: [  506.830402]   Exception class = IABT (lower EL), IL = 32 bits
Jun 24 20:16:46 nvidia-desktop kernel: [  506.836165]   SET = 0, FnV = 0
Jun 24 20:16:46 nvidia-desktop kernel: [  506.836176] NOHZ: local_softirq_pending 282
Jun 24 20:16:46 nvidia-desktop kernel: [  506.836239] NOHZ: local_softirq_pending 282
Jun 24 20:16:46 nvidia-desktop kernel: [  506.836303] NOHZ: local_softirq_pending 282
Jun 24 20:16:46 nvidia-desktop kernel: [  506.836367] NOHZ: local_softirq_pending 282
Jun 24 20:16:46 nvidia-desktop kernel: [  506.837162] NOHZ: local_softirq_pending 282
Jun 24 20:16:46 nvidia-desktop kernel: [  506.837816] NOHZ: local_softirq_pending 282
Jun 24 20:16:46 nvidia-desktop kernel: [  506.839149] tegra-xudc-new 3550000.xudc: entering ELPG done
Jun 24 20:16:46 nvidia-desktop kernel: [  506.839157] NOHZ: local_softirq_pending 282

Since there are few gadget drivers that try to call this function during the binding stage I believe this might be a corner case that causes the Tegra driver to crash causing the system to reboot.

I was wondering if there are any known fixes or ideas from the tegra_xudc driver point of view to avoid the system to crash when no cable is connected in this scenario.

The bottom line, independently of the g_webcam driver, it looks like a call to usb_gadget_connect without the USB cable connected causes the tegra_xudc driver to crash.

Best Regards,
Marco

Hi,

Are you using devkit to reproduce this issue? If so, could you share how to reproduce this issue?

Hi,

Sorry for the slow response, I was working on a possible workaround for this issue and I just got a chance to come to this just now.

In general there are really few gadget drivers that would cause this issue depending of its implementation, the easier is to replicate it with the g_webcam driver. For getting the g_webcam driver working you need lot of changes to add the bulk support, however this is not mandatory to replicate the issue so I can give you an easy way to do so.

In order to make the g_webcam driver to pass the endpoint allocation stage and jump to the usb_gadget_connect call you only need to apply the following patch (debug_uvc.patch.txt (1.2 KB) )

After you applied the patch and rebuild your kernel just copy your new kernel image and modules to your Jetson board. In my case, I am using a Jetson TX2 with JP 4.3, however, this issue should be present on any other Jetson board.

Start your TX2 without the USB cable connected. Then load the g_webcam module as follows:

sudo modprobe g_webcam

Your board will immediately freeze. However, you can get two different traces if you attach the UART and if you watch the /var/log/kern.log simultaneously. The first kernel log points to possible usage of a non-atomic operation within an interrupt context with no much additional information as shown below:

[   48.847932] BUG: scheduling while atomic: v4l_id/7172/0x00000002
[   48.856234] BUG: scheduling while atomic: v4l_id/7172/0x00000000
[   48.862689] BUG: scheduling while atomic: v4l_id/7172/0x00000002
[   48.870486] Unable to handle kernel paging request at virtual address 7f9a479ad0
[   48.877944] Mem abort info:
[   48.880766]   ESR = 0x82000007
[   48.883854]   Exception class = IABT (lower EL), IL = 32 bits
[   48.889655]   SET = 0, FnV = 0
[   48.892734]   EA = 0, S1PTW = 0
[   48.895942] user pgtable: 4k pages, 39-bit VAs, pgd = ffffffc1e0ff4000
[   48.902531] [0000007f9a479ad0] *pgd=0000000260d88003, *pud=0000000260d88003, *pmd=0000000262976003, *pte=0000000000000000
[   48.913627] Internal error: Oops: 82000007 [#1] PREEMPT SMP
[   48.919208] Modules linked in: g_webcam fuse zram overlay bcmdhd cfg80211 ov5693 spidev nvgpu bluedroid_pm ip_tables x_tables
[   48.930734] CPU: 0 PID: 7172 Comm: v4l_id Tainted: G        W       4.9.140-tegra #32
[   48.938563] Hardware name: quill (DT)
[   48.942235] task: ffffffc1db8f0000 task.stack: ffffffc1e2c88000
[   48.948160] PC is at 0x7f9a479ad0
[   48.951479] LR is at 0x7f9a40bf24
[   48.954800] pc : [<0000007f9a479ad0>] lr : [<0000007f9a40bf24>] pstate: 60000000
[   48.962192] sp : 0000007fd449a930
[   48.965510] x29: 0000007fd449a930 x28: 0000007f9a52e000 
[   48.970859] x27: 0000000000000000 x26: 0000000000000001 
[   48.976206] x25: 0000007f9a5807a0 x24: 0000007f9a52a5a0 
[   48.981551] x23: 0000007f9a529000 x22: 0000000000000000 
[   48.986895] x21: 0000007f9a526bd0 x20: 0000000000000008 
[   48.992239] x19: 0000000000000008 x18: 0000007f9a52aa70 
[   48.997583] x17: 0000007f9a40c2a8 x16: 000000555b949ec0 
[   49.002928] x15: 0000000000000000 x14: 0000000000000014 
[   49.008272] x13: 3a3d53454954494c x12: 4942415041435f4c 
[   49.013617] x11: 0000007fd449a7c8 x10: 0000000000000000 
[   49.018962] x9 : 0000007f9a4459c8 x8 : 0000000000000040 
[   49.024305] x7 : 3a74757074756f5f x6 : 0000007f9a581a60 
[   49.029648] x5 : 0000007fd449a880 x4 : 0000000000000000 
[   49.034992] x3 : 0000000000000000 x2 : cc9edb72fce62700 
[   49.040335] x1 : 0000000000000000 x0 : 0000000000000000 
[   49.045676] 
[   49.047177] Process v4l_id (pid: 7172, stack limit = 0xffffffc1e2c88000)
[   49.053890] ---[ end trace d6825deba88d3eb1 ]---
[   49.074396] Kernel panic - not syncing: Fatal exception in interrupt
[   49.080760] SMP: stopping secondary CPUs
[   49.084694] Kernel Offset: disabled
[   49.088188] Memory Limit: none
[   49.091249] trusty-log panic notifier - trusty version Built: 22:43:54 Dec  9 2019 [   49.114497] ---[ end Kernel panic - not syncing: Fatal exception in interrupt

The second log provide some more information. It shows the actual backtrace:

Jul 14 19:20:10 nvidia-desktop kernel: [   48.838858] g_webcam gadget: uvc_function_bind
Jul 14 19:20:10 nvidia-desktop kernel: [   48.839207] g_webcam gadget: Webcam Video Gadget
Jul 14 19:20:10 nvidia-desktop kernel: [   48.839215] g_webcam gadget: g_webcam ready
Jul 14 19:20:10 nvidia-desktop kernel: [   48.839230] tegra-xudc-new 3550000.xudc: exiting ELPG
Jul 14 19:20:10 nvidia-desktop kernel: [   48.844720] tegra-xudc-new 3550000.xudc: exiting ELPG done
Jul 14 19:20:10 nvidia-desktop kernel: [   48.844738] tegra-xudc-new 3550000.xudc: ep 0 (type: 0, dir: out) enabled
Jul 14 19:20:10 nvidia-desktop kernel: [   48.844767] tegra-xudc-new 3550000.xudc: entering ELPG
Jul 14 19:20:10 nvidia-desktop kernel: [   48.847192] tegra-xudc-new 3550000.xudc: entering ELPG done
Jul 14 19:20:10 nvidia-desktop kernel: [   48.847914] tegra-xudc-new 3550000.xudc: exiting ELPG
Jul 14 19:20:10 nvidia-desktop kernel: [   48.847932] BUG: scheduling while atomic: v4l_id/7172/0x00000002
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854024] Modules linked in: g_webcam fuse zram overlay bcmdhd cfg80211 ov5693 spidev nvgpu bluedroid_pm ip_tables x_tables
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854048] CPU: 0 PID: 7172 Comm: v4l_id Not tainted 4.9.140-tegra #32
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854050] Hardware name: quill (DT)
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854052] Call trace:
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854060] [<ffffff800808c038>] dump_backtrace+0x0/0x240
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854064] [<ffffff800808c6a4>] show_stack+0x24/0x30
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854069] [<ffffff800846a1e0>] dump_stack+0x98/0xc0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854073] [<ffffff80080e70bc>] __schedule_bug+0x64/0x80
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854078] [<ffffff8008f6275c>] __schedule+0x60c/0x780
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854080] [<ffffff8008f62910>] schedule+0x40/0xa8
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854083] [<ffffff8008f65960>] schedule_timeout+0x88/0x420
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854086] [<ffffff8008f63560>] wait_for_common+0xa0/0x140
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854089] [<ffffff8008f63664>] wait_for_completion_timeout+0x2c/0x38
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854094] [<ffffff8008be72a4>] bpmp_trywait+0xa4/0x1b0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854096] [<ffffff8008be7504>] tegra_bpmp_send_receive+0x154/0x2b8
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854101] [<ffffff80086b6200>] bpmp_send_clk_message+0x40/0xb0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854103] [<ffffff80086b62f0>] clk_bpmp_enable+0x38/0x48
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854107] [<ffffff800869600c>] clk_core_prepare+0x6c/0x1e0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854109] [<ffffff8008698bc4>] clk_prepare+0x2c/0x58
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854113] [<ffffff8008a9cac0>] tegra_xudc_clk_enable.part.15+0x38/0xd8
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854116] [<ffffff8008a9cd24>] tegra_xudc_unpowergate+0x15c/0x170
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854118] [<ffffff8008a9cd58>] tegra_xudc_runtime_resume+0x20/0x30
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854122] [<ffffff8008796c84>] pm_generic_runtime_resume+0x3c/0x58
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854125] [<ffffff8008798fcc>] __rpm_callback+0x74/0xa0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854127] [<ffffff800879902c>] rpm_callback+0x34/0x98
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854129] [<ffffff800879a4c8>] rpm_resume+0x470/0x710
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854131] [<ffffff800879a7b4>] __pm_runtime_resume+0x4c/0x70
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854134] [<ffffff8008a99a80>] tegra_xudc_gadget_pullup+0x38/0x100
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854136] [<ffffff8008a95f24>] usb_gadget_connect+0x3c/0x148
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854138] [<ffffff8008a96404>] usb_gadget_activate+0x114/0x120
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854141] [<ffffff8008a8e664>] usb_function_activate+0x54/0xa0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854144] [<ffffff8008aba148>] uvc_function_connect+0x28/0x60
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854147] [<ffffff8008abaf58>] uvc_v4l2_open+0x70/0x90
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854151] [<ffffff8008b15278>] v4l2_open+0x80/0x118
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854155] [<ffffff800826fd04>] chrdev_open+0x94/0x198
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854158] [<ffffff80082667d0>] do_dentry_open+0x1b8/0x318
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854161] [<ffffff8008267d78>] vfs_open+0x58/0x88
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854163] [<ffffff800827b034>] do_last+0x454/0xe60
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854166] [<ffffff800827bad0>] path_openat+0x90/0x378
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854168] [<ffffff800827d040>] do_filp_open+0x70/0xe8
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854170] [<ffffff800826823c>] do_sys_open+0x174/0x258
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854172] [<ffffff80082683a4>] SyS_openat+0x3c/0x50
Jul 14 19:20:10 nvidia-desktop kernel: [   48.854174] [<ffffff800808391c>] __sys_trace_return+0x0/0x4
Jul 14 19:20:10 nvidia-desktop kernel: [   48.856213] tegra-xudc-new 3550000.xudc: exiting ELPG done
Jul 14 19:20:10 nvidia-desktop kernel: [   48.856234] BUG: scheduling while atomic: v4l_id/7172/0x00000000
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862236] Modules linked in: g_webcam fuse zram overlay bcmdhd cfg80211 ov5693 spidev nvgpu bluedroid_pm ip_tables x_tables
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862277] CPU: 5 PID: 7172 Comm: v4l_id Tainted: G        W       4.9.140-tegra #32
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862280] Hardware name: quill (DT)
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862284] Call trace:
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862295] [<ffffff800808c038>] dump_backtrace+0x0/0x240
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862303] [<ffffff800808c6a4>] show_stack+0x24/0x30
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862311] [<ffffff800846a1e0>] dump_stack+0x98/0xc0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862318] [<ffffff80080e70bc>] __schedule_bug+0x64/0x80
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862325] [<ffffff8008f6275c>] __schedule+0x60c/0x780
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862331] [<ffffff8008f62910>] schedule+0x40/0xa8
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862337] [<ffffff800808b924>] do_notify_resume+0x9c/0xb0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862343] [<ffffff8008083754>] work_pending+0x8/0x10
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862486] tegra-xudc-new 3550000.xudc: entering ELPG
Jul 14 19:20:10 nvidia-desktop kernel: [   48.862689] BUG: scheduling while atomic: v4l_id/7172/0x00000002
Jul 14 19:20:10 nvidia-desktop kernel: [   48.866811] tegra-xudc-new 3550000.xudc: entering ELPG done
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868767] Modules linked in: g_webcam fuse zram overlay bcmdhd cfg80211 ov5693 spidev nvgpu bluedroid_pm ip_tables x_tables
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868797] CPU: 4 PID: 7172 Comm: v4l_id Tainted: G        W       4.9.140-tegra #32
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868799] Hardware name: quill (DT)
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868801] Call trace:
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868810] [<ffffff800808c038>] dump_backtrace+0x0/0x240
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868814] [<ffffff800808c6a4>] show_stack+0x24/0x30
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868818] [<ffffff800846a1e0>] dump_stack+0x98/0xc0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868822] [<ffffff80080e70bc>] __schedule_bug+0x64/0x80
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868825] [<ffffff8008f6275c>] __schedule+0x60c/0x780
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868828] [<ffffff8008f62910>] schedule+0x40/0xa8
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868832] [<ffffff800879a114>] rpm_resume+0xbc/0x710
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868834] [<ffffff800879a7b4>] __pm_runtime_resume+0x4c/0x70
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868839] [<ffffff8008a99a80>] tegra_xudc_gadget_pullup+0x38/0x100
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868841] [<ffffff8008a9606c>] usb_gadget_disconnect+0x3c/0x148
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868843] [<ffffff8008a962d8>] usb_gadget_deactivate+0x110/0x128
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868846] [<ffffff8008a8e370>] usb_function_deactivate+0x70/0x80
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868849] [<ffffff8008aba1a8>] uvc_function_disconnect+0x28/0x60
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868852] [<ffffff8008abae8c>] uvc_v4l2_release+0x34/0x90
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868855] [<ffffff8008b151a0>] v4l2_release+0x48/0xa0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868859] [<ffffff800826c5c0>] __fput+0x90/0x1d0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868861] [<ffffff800826c778>] ____fput+0x20/0x30
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868865] [<ffffff80080da4dc>] task_work_run+0xbc/0xd8
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868868] [<ffffff800808b92c>] do_notify_resume+0xa4/0xb0
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868870] [<ffffff8008083754>] work_pending+0x8/0x10
Jul 14 19:20:10 nvidia-desktop kernel: [   48.868942] tegra-xudc-new 3550000.xudc: exiting ELPG

I followed a little bit this backtrace and made an interesting finding. If you comment the call to pm_runtime_get_sync call in the tegra_xudc driver, the module load survives. Here is the change (for testing purposes only)

Index: kernel/kernel-4.9/drivers/usb/gadget/udc/tegra_xudc.c
===================================================================
--- kernel.orig/kernel-4.9/drivers/usb/gadget/udc/tegra_xudc.c
+++ kernel/kernel-4.9/drivers/usb/gadget/udc/tegra_xudc.c
@@ -2313,7 +2313,7 @@ static int tegra_xudc_gadget_pullup(stru
 	unsigned long flags;
 	u32 val;
 
-	pm_runtime_get_sync(xudc->dev);
+	//pm_runtime_get_sync(xudc->dev);
 	spin_lock_irqsave(&xudc->lock, flags);
 	if (is_on != xudc->pullup) {
 		val = xudc_readl(xudc, CTRL);

However, doing this causes the USB to not being able to complete the enumeration after connecting the USB cable and you might need to reload the module for this. The goal of this change is not to provide a fix but to narrow down the cause of the issue. Everything points to a possible non-atomic call (maybe a usleep_range instead of a delay? ) when pm_runtime_get_sync is called with no cable connected.

In general, the USB controller and the driver should survive this call with no cable connection and resume the enumeration process once the cable is connected to the host PC.

I hope this information helps to narrow down the cause of the problem.

Best Regards,
Marco

Hi MarcoMadrigal,

Sorry in advance and thanks for analysis. But what I really want to know is

  1. Why do you need to run g_webcam? What usecase are you using?

  2. What step did you use to enable g_webcam on TX2?

  3. Does this issue happen on nv devkit or your custom carrier board?

  4. Since rel-32.4.3 is released, could you move to rel-32.4.3 to test?

Hi @WayneWWW ,

  1. g_webcam driver is the one used to add UVC capabilities to any SoC, when enabled you can make your board to act like a webcam and stream the video over USB using the UVC specification. You can read more about UVC specification at: https://usb.org/document-library/video-class-v11-document-set

  2. You need to enable the G_WEBCAM driver as module at:

    -> Device Drivers
    -> USB support (USB_SUPPORT [=y])
    -> USB Gadget Support (USB_GADGET [=y])
    -> USB Gadget Drivers ( [=m])
    -> USB Webcam Gadget ( USB_G_WEBCAM [=m] )

  3. This happen on the dev kit, either Jetson Nano, Jetson TX1/TX2 and I just need to try it out on Xavier.

  4. I could move to JP 4.4 and test it in a week or so, current development is done in JP 4.3. I will report back on this once I perform the test. I assume your suggestion is because there are new changes in the tegra_xudc driver on the newer Jetpack.

Please note that the issue is independent of using g_webcam but you can replicate it with such driver. The bottom line is that when calling usb_gadget_connect() with no USB cable attached to the host PC the USB driver seems to not being able to handle correctly this event and caused the crash.

I will keep looking at this, if I get a potential fix I will share it with you. Just wanted to warn you about this problem in case you can look at it as well.

Best Regards,
Marco

Hi Marco,

Does this issue only happen to iso or even bulk mode? AFAIK, our TX2 hardware does not support iso transfer mode as EP.

Hi @WayneWWW,

That is correct, the Jetson family does not support Isochronous endpoints, so any test should be done with bulk support. The patch I provided earlier helps to overcome the endpoint allocation issue for testing purposes.

As mentioned before, the gadget driver itself is functional as you can see in the following video https://vimeo.com/304470535 the only aspect I am looking at right now is this special case when the cable is not connected to the host PC at the time the gadget driver is loaded. Any call to uvc_gadget_connect() which leads to the tegra_xudc driver calls on this situation makes the system to crash.

Best Regards,
Marco

Hi,

Currently, we do not get any report that kernel got crashed due to “scheduling while atomic” in tegra_xudc_gadget_pullup func.

The g_webcam driver and isoch eps on device mode are not verified and not supported as well for now.

Hi,

Well there is at least one report now :)

Please note this has nothing to do with the g_webcam driver or isochronous endpoints (actually I am using bulk endpoints).

The main issue description is that any call to usb_gadget_connect() which is a standard call from the gadget driver causes the system to crash when using the tegra_xudc driver if no USB cable is connected. Commonly, the specific USB controller driver should react properly to this call allowing the device to finish enumeration when reconnected.

If you follow the steps provided it would be fairly easy for you to replicate this problem.

Best Regards,
Marco

Hi,

We will check this internally.

Hi,

For this issue, please try to use below command before your test and see if the issue is resolved.

echo on > /sys/device/3550000.xudc/power/control

Add patch to prevent the error for this issue.

diff --git a/drivers/usb/gadget/udc/tegra_xudc.c b/drivers/usb/gadget/udc/tegra_xudc.c
index d2be00a..b52aa6e4 100644
--- a/drivers/usb/gadget/udc/tegra_xudc.c
+++ b/drivers/usb/gadget/udc/tegra_xudc.c
@@ -596,6 +596,8 @@
 
 	struct delayed_work port_reset_war_work;
 	bool wait_for_sec_prc;
+
+	struct work_struct pullup_work;
 };
 
 #define XUDC_TRB_MAX_BUFFER_SIZE 65536
@@ -2312,30 +2314,42 @@
 	return ret;
 }
 
-static int tegra_xudc_gadget_pullup(struct usb_gadget *gadget, int is_on)
+static void tegra_xudc_gadget_pullup_work(struct work_struct *work)
 {
-	struct tegra_xudc *xudc = to_xudc(gadget);
+	struct tegra_xudc *xudc = container_of(work, struct tegra_xudc,
+						pullup_work);
 	unsigned long flags;
 	u32 val;
 
 	pm_runtime_get_sync(xudc->dev);
 	spin_lock_irqsave(&xudc->lock, flags);
-	if (is_on != xudc->pullup) {
-		val = xudc_readl(xudc, CTRL);
-		if (is_on)
-			val |= CTRL_ENABLE;
-		else
-			val &= ~CTRL_ENABLE;
-		xudc_writel(xudc, val, CTRL);
-	}
-	xudc->pullup = is_on;
+	val = xudc_readl(xudc, CTRL);
+	if (xudc->pullup)
+		val |= CTRL_ENABLE;
+	else
+		val &= ~CTRL_ENABLE;
+	xudc_writel(xudc, val, CTRL);
 	if (xudc->ucd && xudc->device_mode &&
-	    xudc->connect_type == EXTCON_USB && is_on)
+	    xudc->connect_type == EXTCON_USB && xudc->pullup)
 		schedule_delayed_work(&xudc->non_std_charger_work,
 			msecs_to_jiffies(NON_STD_CHARGER_DET_TIME_MS));
 	spin_unlock_irqrestore(&xudc->lock, flags);
 	pm_runtime_put(xudc->dev);
 
+}
+
+static int tegra_xudc_gadget_pullup(struct usb_gadget *gadget, int is_on)
+{
+	struct tegra_xudc *xudc = to_xudc(gadget);
+	unsigned long flags;
+
+	spin_lock_irqsave(&xudc->lock, flags);
+	if (is_on != xudc->pullup) {
+		xudc->pullup = is_on;
+		schedule_work(&xudc->pullup_work);
+	}
+	spin_unlock_irqrestore(&xudc->lock, flags);
+
 	return 0;
 }
 
@@ -2353,6 +2367,7 @@
 	dev_dbg(xudc->dev, "%s\n", __func__);
 
 	pm_runtime_get_sync(xudc->dev);
+	cancel_work_sync(&xudc->pullup_work);
 	spin_lock_irqsave(&xudc->lock, flags);
 	if (xudc->driver) {
 		ret = -EBUSY;
@@ -2396,6 +2411,7 @@
 	u32 val;
 
 	pm_runtime_get_sync(xudc->dev);
+	cancel_work_sync(&xudc->pullup_work);
 	spin_lock_irqsave(&xudc->lock, flags);
 	val = xudc_readl(xudc, CTRL);
 	val &= ~(CTRL_IE | CTRL_ENABLE);
@@ -4332,6 +4348,7 @@
 	INIT_WORK(&xudc->boost_emc, tegra_xudc_boost_emc_work);
 	INIT_DELAYED_WORK(&xudc->port_reset_war_work,
 				tegra_xudc_port_reset_war_work);
+	INIT_WORK(&xudc->pullup_work, tegra_xudc_gadget_pullup_work);
 
 	tegra_pd_add_device(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
@@ -4380,6 +4397,7 @@
 	if (xudc->ucd) {
 		cancel_work_sync(&xudc->set_charging_current_work);
 		cancel_delayed_work_sync(&xudc->non_std_charger_work);
+		cancel_work_sync(&xudc->pullup_work);
 		tegra_usb_release_ucd(xudc->ucd);
 	}
 
@@ -4445,6 +4463,7 @@
 		return 0;
 
 	dev_info(xudc->dev, "entering ELPG\n");
+	cancel_work_sync(&xudc->pullup_work);
 	spin_lock_irqsave(&xudc->lock, flags);
 	xudc->powergated = true;
 	xudc->saved_regs.ctrl = xudc_readl(xudc, CTRL);

Hi Wayne,

Sorry for the late reply, thanks for the patch, I will try it out and get back to you.

Best Regards,
-Marco