Hello, I have 3 nodes, the test does not work -
ib_send_bw -d mlx4_0 -i 1 -F --report_gbits 10.0.0.5
Couldn’t connect to 10.0.0.5:18515
Unable to open file descriptor for socket connection Unable to init the socket connection.
other tests are run.
udaddy -s 10.0.0.5
udaddy: starting client
udaddy: connecting
initiating data transfers
receiving data transfers
data transfers complete
test complete
return status 0
[root@filo-server2 ~]# rdma_client -s 10.0.0.5
rdma_client: start
rdma_client: end 0
[root@filo-server2 ~]# ib_send_bw -d mlx4_0 -i 1 -F --report_gbits 10.0.0.5
Couldn’t connect to 10.0.0.5:18515
Unable to open file descriptor for socket connection Unable to init the socket connection
[root@filo-server2 ~]# rping -c -a 10.0.0.5 -C 10 -v
ping data: rdma-ping-0: ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqr
ping data: rdma-ping-1: BCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrs
ping data: rdma-ping-2: CDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrst
ping data: rdma-ping-3: DEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstu
ping data: rdma-ping-4: EFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuv
ping data: rdma-ping-5: FGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvw
ping data: rdma-ping-6: GHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwx
ping data: rdma-ping-7: HIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxy
ping data: rdma-ping-8: IJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz
ping data: rdma-ping-9: JKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyzA
client DISCONNECT EVENT…
[root@filo-server2 ~]# ucmatose -s 10.0.0.5
cmatose: starting client
cmatose: connecting
receiving data transfers
sending replies
data transfers complete
test complete
return status 0
sminfo node1
sminfo: sm lid 3 sm guid 0x2c9030004a773, activity count 949 priority 14 state 3 SMINFO_MASTER
ibstat
CA ‘mlx4_0’
CA type: MT26428
Number of ports: 2
Firmware version: 2.9.1000
Hardware version: a0
Node GUID: 0x0002c9030004a772
System image GUID: 0x0002c9030004a775
Port 1:
State: Active
Physical state: LinkUp
Rate: 40
Base lid: 3
LMC: 0
SM lid: 3
Capability mask: 0x0251086a
Port GUID: 0x0002c9030004a773
Link layer: InfiniBand
Port 2:
State: Down
Physical state: Polling
Rate: 10
Base lid: 0
LMC: 0
SM lid: 0
Capability mask: 0x02510868
Port GUID: 0x0002c9030004a774
Link layer: InfiniBand
sminfo node2
sminfo: sm lid 3 sm guid 0x2c9030004a773, activity count 960 priority 14 state 3 SMINFO_MASTE
ibstat
CA ‘mlx4_0’
CA type: MT26428
Number of ports: 2
Firmware version: 2.9.1000
Hardware version: a0
Node GUID: 0x0002c90300048974
System image GUID: 0x0002c90300048977
Port 1:
State: Active
Physical state: LinkUp
Rate: 40
Base lid: 4
LMC: 0
SM lid: 3
Capability mask: 0x0251086a
Port GUID: 0x0002c90300048975
Link layer: InfiniBand
Port 2:
State: Down
Physical state: Polling
Rate: 10
Base lid: 0
LMC: 0
SM lid: 0
Capability mask: 0x02510868
Port GUID: 0x0002c90300048976
Link layer: InfiniBand
ip a l ib0
5: ib0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 65520 qdisc mq state UP qlen 1024
link/infiniband a0:00:02:08:fe:80:00:00:00:00:00:00:00:02:c9:03:00:04:a7:73 brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
inet 10.0.0.5/24 brd 10.0.0.255 scope global ib0
valid_lft forever preferred_lft forever
inet6 fe80::202:c903:4:a773/64 scope link
valid_lft forever preferred_lft forever
ip a l ib0
5: ib0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 65520 qdisc mq state UP qlen 1024
link/infiniband a0:00:02:08:fe:80:00:00:00:00:00:00:00:02:c9:03:00:04:89:75 brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
inet 10.0.0.3/24 brd 10.0.0.255 scope global ib0
valid_lft forever preferred_lft forever
inet6 fe80::202:c903:4:8975/64 scope link
valid_lft forever preferred_lft forever
cat /sys/class/net/ib0/mode connected for all nodes.
cat /etc/infiniband/openib.conf
Start HCA driver upon boot
ONBOOT=yes
Node description
NODE_DESC=$(hostname -s)
Wait for NODE_DESC_TIME_BEFORE_UPDATE sec before node_desc update
NODE_DESC_TIME_BEFORE_UPDATE=20
Max time in seconds to wait for node’s hostname to be set
NODE_DESC_UPDATE_TIMEOUT=120
Seconds to sleep after openibd start finished and before releasing the shell
POST_START_DELAY=0
Run /usr/sbin/mlnx_affinity
RUN_AFFINITY_TUNER=no
Run /usr/sbin/mlnx_tune
RUN_MLNX_TUNE=no
Increase ib_mad thread priority
RENICE_IB_MAD=no
Run sysctl performance tuning script
RUN_SYSCTL=no
Load UMAD module
UMAD_LOAD=yes
Load UVERBS module
UVERBS_LOAD=yes
Load UCM module
UCM_LOAD=yes
Load RDMA_CM module
RDMA_CM_LOAD=yes
Load RDMA_UCM module
RDMA_UCM_LOAD=yes
Load MTHCA
MTHCA_LOAD=no
Load MLX4 modules
MLX4_LOAD=yes
Load MLX4_EN module
MLX4_EN_LOAD=yes
Load MLX4_VNIC module
MLX4_VNIC_LOAD=no
Load MLX5 modules
MLX5_LOAD=yes
Load CXGB3 modules
CXGB3_LOAD=no
Load CXGB4 modules
CXGB4_LOAD=no
Load NES modules
NES_LOAD=no
Load QIB
QIB_LOAD=no
QIB QME BP VER
QIB_QME_BPVER=1
Load IPoIB
IPOIB_LOAD=yes
Enable IPoIB Connected Mode
#SET_IPOIB_CM=auto
SET_IPOIB_CM=yes
Load IPATH
IPATH_LOAD=no
Load SDP module
SDP_LOAD=no
Load SRP module
SRP_LOAD=no
Load RDS module
RDS_LOAD=no
Load E_IPoIB
E_IPOIB_LOAD=no
Enable SRP High Availability daemon
SRPHA_ENABLE=no
SRP_DAEMON_ENABLE=no
run Firmware update utlity upon system boot in order to update Firmware for newly installed devices
RUN_FW_UPDATER_ONBOOT=no
IPOIB_MTU=65520
how to solve? Thank you.