Non-zero status: 22 ibv_modify_qp failed When running nvshmem example on more than one GPU

I’m trying to run the example program from nvshmem documentation

#include <stdio.h>
#include <cuda.h>
#include <nvshmem.h>
#include <nvshmemx.h>
#include <mpi/mpi.h>

__global__ void simple_shift(int *destination) {
    int mype = nvshmem_my_pe();
    int npes = nvshmem_n_pes();
    int peer = (mype + 1) % npes;

    nvshmem_int_p(destination, mype, peer);
}

int main(int argc, char *argv[]) {
    int msg, rank, ndevices;
    cudaStream_t stream;

    nvshmemx_init_attr_t attr;
    MPI_Comm comm = MPI_COMM_WORLD;
    attr.mpi_comm = &comm;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    cudaGetDeviceCount(&ndevices);
    cudaSetDevice(rank % ndevices);
    nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr);

    cudaStreamCreate(&stream);

    int *destination = (int *) nvshmem_malloc(sizeof(int));

    simple_shift<<<1, 1, 0, stream>>>(destination);
    nvshmemx_barrier_all_on_stream(stream);
    cudaMemcpyAsync(&msg, destination, sizeof(int), cudaMemcpyDeviceToHost, stream);

    cudaStreamSynchronize(stream);
    printf("%d: received message %d\n", nvshmem_my_pe(), msg);

    nvshmem_free(destination);
    nvshmem_finalize();
    MPI_Finalize();
    return 0;
}

But if I try to run it on more than 2 GPUs I’m getting a bunch of errors:

> nvcc -rdc=true -ccbin g++ -arch=sm_90 -I /usr/include/nvshmem_12 -I /usr/include/x86_64-linux-gnu/openmpi  example.cu -o example.out -L /usr/lib/x86_64-linux-gnu/nvshmem/12 -lmpi -lnvshmem -lnvidia-ml -lcuda -lcudart

> mpirun --allow-run-as-root  -n 2 example.out
0: received message 1
1: received message 0

> mpirun --allow-run-as-root  -n 3 example.out
/dvs/p4/build/sw/rel/gpgpu/toolkit/r12.8/main_nvshmem/src/modules/transport/ibrc/ibrc.cpp:420: non-zero status: 22 ibv_modify_qp failed

/dvs/p4/build/sw/rel/gpgpu/toolkit/r12.8/main_nvshmem/src/modules/transport/ibrc/ibrc.cpp:1511: non-zero status: 7 ep_connect failed

/dvs/p4/build/sw/rel/gpgpu/toolkit/r12.8/main_nvshmem/src/modules/transport/ibrc/ibrc.cpp:1580: non-zero status: 7 transport create connect failed

/dvs/p4/build/sw/rel/gpgpu/toolkit/r12.8/main_nvshmem/src/host/transport/transport.cpp:420: non-zero status: 7 connect EPS failed

/dvs/p4/build/sw/rel/gpgpu/toolkit/r12.8/main_nvshmem/src/host/init/init.cu:1045: non-zero status: 7 nvshmem setup connections failed

What could be the cause for it?

Here’s my topology:

        GPU0    GPU1    GPU2    GPU3    GPU4    GPU5    GPU6    GPU7    NIC0    NIC1    NIC2    NIC3    NIC4    NIC5    CPU Affinity    NUMA Affinity   GPU NUMA ID
GPU0     X      NV18    NV18    NV18    NV18    NV18    NV18    NV18    PIX     NODE    NODE    NODE    SYS     SYS     0-31,64-95      0               N/A
GPU1    NV18     X      NV18    NV18    NV18    NV18    NV18    NV18    NODE    NODE    NODE    NODE    SYS     SYS     0-31,64-95      0               N/A
GPU2    NV18    NV18     X      NV18    NV18    NV18    NV18    NV18    NODE    PIX     NODE    NODE    SYS     SYS     0-31,64-95      0               N/A
GPU3    NV18    NV18    NV18     X      NV18    NV18    NV18    NV18    NODE    NODE    NODE    NODE    SYS     SYS     0-31,64-95      0               N/A
GPU4    NV18    NV18    NV18    NV18     X      NV18    NV18    NV18    SYS     SYS     SYS     SYS     PIX     NODE    32-63,96-127    1               N/A
GPU5    NV18    NV18    NV18    NV18    NV18     X      NV18    NV18    SYS     SYS     SYS     SYS     NODE    NODE    32-63,96-127    1               N/A
GPU6    NV18    NV18    NV18    NV18    NV18    NV18     X      NV18    SYS     SYS     SYS     SYS     NODE    PIX     32-63,96-127    1               N/A
GPU7    NV18    NV18    NV18    NV18    NV18    NV18    NV18     X      SYS     SYS     SYS     SYS     NODE    NODE    32-63,96-127    1               N/A
NIC0    PIX     NODE    NODE    NODE    SYS     SYS     SYS     SYS      X      NODE    NODE    NODE    SYS     SYS
NIC1    NODE    NODE    PIX     NODE    SYS     SYS     SYS     SYS     NODE     X      NODE    NODE    SYS     SYS
NIC2    NODE    NODE    NODE    NODE    SYS     SYS     SYS     SYS     NODE    NODE     X      PIX     SYS     SYS
NIC3    NODE    NODE    NODE    NODE    SYS     SYS     SYS     SYS     NODE    NODE    PIX      X      SYS     SYS
NIC4    SYS     SYS     SYS     SYS     PIX     NODE    NODE    NODE    SYS     SYS     SYS     SYS      X      NODE
NIC5    SYS     SYS     SYS     SYS     NODE    NODE    PIX     NODE    SYS     SYS     SYS     SYS     NODE     X

Legend:

X = Self
SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
PXB = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
PIX = Connection traversing at most a single PCIe bridge
NV# = Connection traversing a bonded set of # NVLinks

NIC Legend:

NIC0: mlx5_0
NIC1: mlx5_1
NIC2: mlx5_2
NIC3: mlx5_3
NIC4: mlx5_4
NIC5: mlx5_5

and my ibv_devinfo:

hca_id: mlx5_0
transport: InfiniBand (0)
fw_ver: 28.41.1000
node_guid: 9c63:c003:0055:48fe
sys_image_guid: 9c63:c003:0055:48fe
vendor_id: 0x02c9
vendor_part_id: 4129
hw_ver: 0x0
board_id: MT_0000000838
phys_port_cnt: 1
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 4096 (5)
sm_lid: 1167
port_lid: 975
port_lmc: 0x00
link_layer: InfiniBand

hca_id: mlx5_1
transport: InfiniBand (0)
fw_ver: 28.41.1000
node_guid: 9c63:c003:0055:48b6
sys_image_guid: 9c63:c003:0055:48b6
vendor_id: 0x02c9
vendor_part_id: 4129
hw_ver: 0x0
board_id: MT_0000000838
phys_port_cnt: 1
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 4096 (5)
sm_lid: 1167
port_lid: 976
port_lmc: 0x00
link_layer: InfiniBand

hca_id: mlx5_2
transport: InfiniBand (0)
fw_ver: 20.38.1900
node_guid: b83f:d203:0091:eb2a
sys_image_guid: b83f:d203:0091:eb2a
vendor_id: 0x02c9
vendor_part_id: 4123
hw_ver: 0x0
board_id: MT_0000000224
phys_port_cnt: 1
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 1024 (3)
sm_lid: 0
port_lid: 0
port_lmc: 0x00
link_layer: Ethernet

hca_id: mlx5_3
transport: InfiniBand (0)
fw_ver: 20.38.1900
node_guid: b83f:d203:0091:eb2b
sys_image_guid: b83f:d203:0091:eb2a
vendor_id: 0x02c9
vendor_part_id: 4123
hw_ver: 0x0
board_id: MT_0000000224
phys_port_cnt: 1
port: 1
state: PORT_DOWN (1)
max_mtu: 4096 (5)
active_mtu: 1024 (3)
sm_lid: 0
port_lid: 0
port_lmc: 0x00
link_layer: Ethernet

hca_id: mlx5_4
transport: InfiniBand (0)
fw_ver: 28.41.1000
node_guid: 9c63:c003:0055:46fe
sys_image_guid: 9c63:c003:0055:46fe
vendor_id: 0x02c9
vendor_part_id: 4129
hw_ver: 0x0
board_id: MT_0000000838
phys_port_cnt: 1
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 4096 (5)
sm_lid: 1167
port_lid: 981
port_lmc: 0x00
link_layer: InfiniBand

hca_id: mlx5_5
transport: InfiniBand (0)
fw_ver: 28.41.1000
node_guid: 9c63:c003:005b:1e14
sys_image_guid: 9c63:c003:005b:1e14
vendor_id: 0x02c9
vendor_part_id: 4129
hw_ver: 0x0
board_id: MT_0000000838
phys_port_cnt: 1
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 4096 (5)
sm_lid: 1167
port_lid: 984
port_lmc: 0x00
link_layer: InfiniBand

The issue got resolved by setting NVSHMEM_ENABLE_NIC_PE_MAPPING=1 NVSHMEM_HCA_LIST=mlx5_0:1,mlx5_1:1,mlx5_4:1,mlx5_5:1

1 Like

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.