I’m hitting a NULL pointer dereference in both MLNX OFED 23.10 and 23.07 when built from source in the ‘nvme-tcp’ driver: PANIC: “BUG: unable to handle kernel NULL pointer dereference at 0000000000000074”

I am able hit the issue and reproduce it every time on my system when I run “nvme connect -t tcp -s 4420 …” to connect to a remote NVMeoF TCP target.

crash> bt
PID: 3127389  TASK: ffffa0b63d0c0000  CPU: 22  COMMAND: "nvme"
 #0 [ffffb7c38d66fad0] machine_kexec at ffffffff9fe6c243
 #1 [ffffb7c38d66fb28] __crash_kexec at ffffffff9ffb58fa
 #2 [ffffb7c38d66fbe8] crash_kexec at ffffffff9ffb6831
 #3 [ffffb7c38d66fc00] oops_end at ffffffff9fe2a9c1
 #4 [ffffb7c38d66fc20] no_context at ffffffff9fe7e913
 #5 [ffffb7c38d66fc78] __bad_area_nosemaphore at ffffffff9fe7ec74
 #6 [ffffb7c38d66fcc0] do_page_fault at ffffffff9fe7f8b7
 #7 [ffffb7c38d66fcf0] page_fault at ffffffffa0a0116e
    [exception RIP: nvme_tcp_map_queues+32]
    RIP: ffffffffc02a6b40  RSP: ffffb7c38d66fda0  RFLAGS: 00010246
    RAX: 0000000000000000  RBX: ffffa0ae44bd6008  RCX: 0000000000000020
    RDX: ffffffffc02ab1a0  RSI: 0000000000000020  RDI: ffffa0ae44bd6008
    RBP: ffffa0ae44bd6338   R8: ffffb7c38d66fd70   R9: ffffa0af82f82000
    R10: ffffa0af82f82000  R11: 0000000000000001  R12: 0000000000000000
    R13: 0000000000000001  R14: 0000000000000021  R15: 0000000000000021
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #8 [ffffb7c38d66fdb8] blk_mq_alloc_tag_set at ffffffffa02aa79e
 #9 [ffffb7c38d66fde0] nvme_alloc_io_tag_set at ffffffffc0bcddf4 [nvme_core]
#10 [ffffb7c38d66fe00] nvme_tcp_setup_ctrl.cold.57 at ffffffffc02aa4f1 [nvme_tcp]
#11 [ffffb7c38d66fe50] nvme_tcp_create_ctrl at ffffffffc02a99fd [nvme_tcp]
#12 [ffffb7c38d66fe88] nvmf_dev_write at ffffffffc02a058f [nvme_fabrics]
#13 [ffffb7c38d66fed0] vfs_write at ffffffffa0165245
#14 [ffffb7c38d66ff00] ksys_write at ffffffffa01654cf
#15 [ffffb7c38d66ff38] do_syscall_64 at ffffffff9fe052fb
#16 [ffffb7c38d66ff50] entry_SYSCALL_64_after_hwframe at ffffffffa0a000a9
    RIP: 00007f1538f86d38  RSP: 00007ffe5a305748  RFLAGS: 00000246
    RAX: ffffffffffffffda  RBX: 0000000000000003  RCX: 00007f1538f86d38
    RDX: 00000000000000ff  RSI: 00007ffe5a306d00  RDI: 0000000000000003
    RBP: 00007ffe5a306d00   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000246  R12: 00000000000000ff
    R13: 0000000000000009  R14: 00007ffe5a3080a0  R15: 000055db338b84c0
    ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b
crash> dis -rl ffffffffc02a6b40
/root/MLNX_OFED_SRC-23.10- 2620
0xffffffffc02a6b20 <nvme_tcp_map_queues>:       nopl   0x0(%rax,%rax,1) [FTRACE NOP]
/root/MLNX_OFED_SRC-23.10- 2622
0xffffffffc02a6b25 <nvme_tcp_map_queues+5>:     push   %r12
0xffffffffc02a6b27 <nvme_tcp_map_queues+7>:     push   %rbp
0xffffffffc02a6b28 <nvme_tcp_map_queues+8>:     push   %rbx
0xffffffffc02a6b29 <nvme_tcp_map_queues+9>:     mov    0x90(%rdi),%rbp
/root/MLNX_OFED_SRC-23.10- 2623
0xffffffffc02a6b30 <nvme_tcp_map_queues+16>:    mov    %rdi,%rbx
0xffffffffc02a6b33 <nvme_tcp_map_queues+19>:    mov    0x1128(%rbp),%r12
/root/MLNX_OFED_SRC-23.10- 2625
0xffffffffc02a6b3a <nvme_tcp_map_queues+26>:    mov    0x1300(%rbp),%eax
0xffffffffc02a6b40 <nvme_tcp_map_queues+32>:    mov    0x74(%r12),%ecx
(gdb) list *(nvme_tcp_map_queues+32)
0xb70 is in nvme_tcp_map_queues (/root/MLNX_OFED_SRC-23.10-
2620	{
2622		struct nvme_tcp_ctrl *ctrl = set->driver_data;
2623		struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
2625		if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
2626			/* separate read/write queues */
2627			set->map[HCTX_TYPE_DEFAULT].nr_queues =
2628				ctrl->io_queues[HCTX_TYPE_DEFAULT];
2629			set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;

I’m using a custom kernel with Rocky 8.7 and therefore must build MLNX OFED from source; when doing so, the backport patches are applied, and there is an offending chunk in ‘0252-BACKPORT-drivers-nvme-host-tcp.c.patch’:

@@ -2446,9 +2612,14 @@ static blk_status_t nvme_tcp_queue_rq(st
        return BLK_STS_OK;

+static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
 static void nvme_tcp_map_queues(struct blk_mq_tag_set *set)
-       struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
+       struct nvme_tcp_ctrl *ctrl = set->driver_data;
        struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;

        if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {

Specifically ‘set->driver_data’ contains a ‘struct nvme_ctrl *’ but when this patch applies, it’s evaluated as a ‘struct nvme_tcp_ctrl *’ which is not correct according to how the value is set. This leads to the NULL pointer dereference when accessing ‘opts’ (‘ctrl->ctrl.opts’).

My current work-around is to apply the following patch to the “mlnx-ofa_kernel-23.07” (both 23.10 and 23.07 are affected by this, but I’m actively using 23.07) extracted sources before building:

--- a/backports/0252-BACKPORT-drivers-nvme-host-tcp.c.patch	2023-08-24 10:05:13.000000000 -0500
+++ b/backports/0252-BACKPORT-drivers-nvme-host-tcp.c.patch	2024-01-08 08:14:31.773443205 -0600
@@ -340,7 +340,7 @@
  static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
  		const struct blk_mq_queue_data *bd)
-@@ -2445,9 +2559,14 @@ static blk_status_t nvme_tcp_queue_rq(st
+@@ -2445,8 +2559,13 @@ static blk_status_t nvme_tcp_queue_rq(st
  	return BLK_STS_OK;
@@ -350,12 +350,10 @@
  static void nvme_tcp_map_queues(struct blk_mq_tag_set *set)
--	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
-+	struct nvme_tcp_ctrl *ctrl = set->driver_data;
+ 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
  	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
- 	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
 @@ -2486,9 +2605,25 @@ static void nvme_tcp_map_queues(struct b

To build, I extract “mlnx-ofa_kernel-23.10.tgz” from “MLNX_OFED_SRC-23.10-” and run the following:

./configure --with-nvmf_host-mod

I was curious if this was an issue known internally to NVIDIA? And if so, is there an upcoming MOFED release planned to resolve it? I searched around a bit on this forum and other places but wasn’t able to find anyone else discussing this issue.



So far, nvme driver only support on tested kernel base on RN,

