MLNX OFED 23.10 NULL Pointer Dereference (nvme-tcp.ko)

Hi,

I’m hitting a NULL pointer dereference in both MLNX OFED 23.10 and 23.07 when built from source in the ‘nvme-tcp’ driver: PANIC: “BUG: unable to handle kernel NULL pointer dereference at 0000000000000074”

I am able hit the issue and reproduce it every time on my system when I run “nvme connect -t tcp -s 4420 …” to connect to a remote NVMeoF TCP target.

crash> bt
PID: 3127389  TASK: ffffa0b63d0c0000  CPU: 22  COMMAND: "nvme"
 #0 [ffffb7c38d66fad0] machine_kexec at ffffffff9fe6c243
 #1 [ffffb7c38d66fb28] __crash_kexec at ffffffff9ffb58fa
 #2 [ffffb7c38d66fbe8] crash_kexec at ffffffff9ffb6831
 #3 [ffffb7c38d66fc00] oops_end at ffffffff9fe2a9c1
 #4 [ffffb7c38d66fc20] no_context at ffffffff9fe7e913
 #5 [ffffb7c38d66fc78] __bad_area_nosemaphore at ffffffff9fe7ec74
 #6 [ffffb7c38d66fcc0] do_page_fault at ffffffff9fe7f8b7
 #7 [ffffb7c38d66fcf0] page_fault at ffffffffa0a0116e
    [exception RIP: nvme_tcp_map_queues+32]
    RIP: ffffffffc02a6b40  RSP: ffffb7c38d66fda0  RFLAGS: 00010246
    RAX: 0000000000000000  RBX: ffffa0ae44bd6008  RCX: 0000000000000020
    RDX: ffffffffc02ab1a0  RSI: 0000000000000020  RDI: ffffa0ae44bd6008
    RBP: ffffa0ae44bd6338   R8: ffffb7c38d66fd70   R9: ffffa0af82f82000
    R10: ffffa0af82f82000  R11: 0000000000000001  R12: 0000000000000000
    R13: 0000000000000001  R14: 0000000000000021  R15: 0000000000000021
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #8 [ffffb7c38d66fdb8] blk_mq_alloc_tag_set at ffffffffa02aa79e
 #9 [ffffb7c38d66fde0] nvme_alloc_io_tag_set at ffffffffc0bcddf4 [nvme_core]
#10 [ffffb7c38d66fe00] nvme_tcp_setup_ctrl.cold.57 at ffffffffc02aa4f1 [nvme_tcp]
#11 [ffffb7c38d66fe50] nvme_tcp_create_ctrl at ffffffffc02a99fd [nvme_tcp]
#12 [ffffb7c38d66fe88] nvmf_dev_write at ffffffffc02a058f [nvme_fabrics]
#13 [ffffb7c38d66fed0] vfs_write at ffffffffa0165245
#14 [ffffb7c38d66ff00] ksys_write at ffffffffa01654cf
#15 [ffffb7c38d66ff38] do_syscall_64 at ffffffff9fe052fb
#16 [ffffb7c38d66ff50] entry_SYSCALL_64_after_hwframe at ffffffffa0a000a9
    RIP: 00007f1538f86d38  RSP: 00007ffe5a305748  RFLAGS: 00000246
    RAX: ffffffffffffffda  RBX: 0000000000000003  RCX: 00007f1538f86d38
    RDX: 00000000000000ff  RSI: 00007ffe5a306d00  RDI: 0000000000000003
    RBP: 00007ffe5a306d00   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000246  R12: 00000000000000ff
    R13: 0000000000000009  R14: 00007ffe5a3080a0  R15: 000055db338b84c0
    ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b
crash>
crash> dis -rl ffffffffc02a6b40
/root/MLNX_OFED_SRC-23.10-1.1.9.0/SRPMS/mlnx-ofa_kernel-23.10/drivers/nvme/host/tcp.c: 2620
0xffffffffc02a6b20 <nvme_tcp_map_queues>:       nopl   0x0(%rax,%rax,1) [FTRACE NOP]
/root/MLNX_OFED_SRC-23.10-1.1.9.0/SRPMS/mlnx-ofa_kernel-23.10/drivers/nvme/host/tcp.c: 2622
0xffffffffc02a6b25 <nvme_tcp_map_queues+5>:     push   %r12
0xffffffffc02a6b27 <nvme_tcp_map_queues+7>:     push   %rbp
0xffffffffc02a6b28 <nvme_tcp_map_queues+8>:     push   %rbx
0xffffffffc02a6b29 <nvme_tcp_map_queues+9>:     mov    0x90(%rdi),%rbp
/root/MLNX_OFED_SRC-23.10-1.1.9.0/SRPMS/mlnx-ofa_kernel-23.10/drivers/nvme/host/tcp.c: 2623
0xffffffffc02a6b30 <nvme_tcp_map_queues+16>:    mov    %rdi,%rbx
0xffffffffc02a6b33 <nvme_tcp_map_queues+19>:    mov    0x1128(%rbp),%r12
/root/MLNX_OFED_SRC-23.10-1.1.9.0/SRPMS/mlnx-ofa_kernel-23.10/drivers/nvme/host/tcp.c: 2625
0xffffffffc02a6b3a <nvme_tcp_map_queues+26>:    mov    0x1300(%rbp),%eax
0xffffffffc02a6b40 <nvme_tcp_map_queues+32>:    mov    0x74(%r12),%ecx
crash>
(gdb) list *(nvme_tcp_map_queues+32)
0xb70 is in nvme_tcp_map_queues (/root/MLNX_OFED_SRC-23.10-1.1.9.0/SRPMS/mlnx-ofa_kernel-23.10/drivers/nvme/host/tcp.c:2625).
2620	{
2621	#ifdef HAVE_BLK_MQ_HCTX_TYPE
2622		struct nvme_tcp_ctrl *ctrl = set->driver_data;
2623		struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
2624	
2625		if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
2626			/* separate read/write queues */
2627			set->map[HCTX_TYPE_DEFAULT].nr_queues =
2628				ctrl->io_queues[HCTX_TYPE_DEFAULT];
2629			set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
(gdb) 

I’m using a custom kernel with Rocky 8.7 and therefore must build MLNX OFED from source; when doing so, the backport patches are applied, and there is an offending chunk in ‘0252-BACKPORT-drivers-nvme-host-tcp.c.patch’:

@@ -2446,9 +2612,14 @@ static blk_status_t nvme_tcp_queue_rq(st
        return BLK_STS_OK;
 }

+#ifdef HAVE_BLK_MQ_OPS_MAP_QUEUES_RETURN_INT
+static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
+#else
 static void nvme_tcp_map_queues(struct blk_mq_tag_set *set)
+#endif
 {
-       struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
+#ifdef HAVE_BLK_MQ_HCTX_TYPE
+       struct nvme_tcp_ctrl *ctrl = set->driver_data;
        struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;

        if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {

Specifically ‘set->driver_data’ contains a ‘struct nvme_ctrl *’ but when this patch applies, it’s evaluated as a ‘struct nvme_tcp_ctrl *’ which is not correct according to how the value is set. This leads to the NULL pointer dereference when accessing ‘opts’ (‘ctrl->ctrl.opts’).

My current work-around is to apply the following patch to the “mlnx-ofa_kernel-23.07” (both 23.10 and 23.07 are affected by this, but I’m actively using 23.07) extracted sources before building:

--- a/backports/0252-BACKPORT-drivers-nvme-host-tcp.c.patch	2023-08-24 10:05:13.000000000 -0500
+++ b/backports/0252-BACKPORT-drivers-nvme-host-tcp.c.patch	2024-01-08 08:14:31.773443205 -0600
@@ -340,7 +340,7 @@
  
  static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
  		const struct blk_mq_queue_data *bd)
-@@ -2445,9 +2559,14 @@ static blk_status_t nvme_tcp_queue_rq(st
+@@ -2445,8 +2559,13 @@ static blk_status_t nvme_tcp_queue_rq(st
  	return BLK_STS_OK;
  }
  
@@ -350,12 +350,10 @@
  static void nvme_tcp_map_queues(struct blk_mq_tag_set *set)
 +#endif
  {
--	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
 +#ifdef HAVE_BLK_MQ_HCTX_TYPE
-+	struct nvme_tcp_ctrl *ctrl = set->driver_data;
+ 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
  	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
  
- 	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
 @@ -2486,9 +2605,25 @@ static void nvme_tcp_map_queues(struct b
  		ctrl->io_queues[HCTX_TYPE_DEFAULT],
  		ctrl->io_queues[HCTX_TYPE_READ],

To build, I extract “mlnx-ofa_kernel-23.10.tgz” from “MLNX_OFED_SRC-23.10-1.1.9.0” and run the following:

./configure --with-nvmf_host-mod
make

I was curious if this was an issue known internally to NVIDIA? And if so, is there an upcoming MOFED release planned to resolve it? I searched around a bit on this forum and other places but wasn’t able to find anyone else discussing this issue.

Thanks,

Marc

So far, nvme driver only support on tested kernel base on RN,

https://docs.nvidia.com/networking/display/mlnxofedv23100550/general+support

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.