I have some trouble with inter-node nvshmem environment setting and RDMA environment setting.
I installed the nv_peer_mem and gdrcopy based on this doc( NVSHMEM Installation Guide — nvshmem 2.10.1 documentation (nvidia.com)) on the catalyst-cluster.cs.cmu.edu.
My lsmod | grep nv_peer_mem and depmod -n | grep -i gdrdrv have output, which means I I installed the nv_peer_mem and gdrcopy successfully.
I also write a demo to test the RDMA connection. one node is server and another node is client. The log indicates the RDMA connection is ok. Although if I use the ibping - S, the output is ibwarn: [446273] mad_rpc_open_port: can’t open UMAD port ((null):0) ibping: iberror: failed: Failed to open ‘(null)’ port '0’ . If I use the sudo ibping - S, the output is ibwarn: [453537] _do_madrpc: recv failed: Connection timed out ibwarn: [453537] mad_rpc_rmpp: _do_madrpc failed; dport (Lid 172).
If I run rdma-server on one node and rdma-client -s ip on other node, there is no problem.
Then I try to run my code to test the internode nvshmem. the command is nvshmrun -np 2 --host ip1,ip2 ./worker > nvshmem.log 2>&1 #change the ip to your ip address
#include <stdio.h>
#include "mpi.h"
#include "nvshmem.h"
#include "nvshmemx.h"
#include <unistd.h>
#define CUDA_CHECK(stmt) \
do { \
cudaError_t result = (stmt); \
if (cudaSuccess != result) { \
fprintf(stderr, "[%s:%d] CUDA failed with %s \n", \
__FILE__, __LINE__, cudaGetErrorString(result)); \
exit(-1); \
} \
} while (0)
__global__ void simple_shift(int *destination) {
int mype = nvshmem_my_pe();
int npes = nvshmem_n_pes();
int peer = (mype + 1) % npes;
nvshmem_int_p(destination, mype, peer);
}
int main (int argc, char *argv[]) {
int mype_node, msg;
cudaStream_t stream;
int rank, nranks;
char hostname[256];
gethostname(hostname, 256);
MPI_Comm mpi_comm = MPI_COMM_WORLD;
nvshmemx_init_attr_t attr;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nranks);
printf("Rank: %d, Hostname: %s and nranks:%d \n", rank, hostname,nranks);
attr.mpi_comm = &mpi_comm;
nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr);
mype_node = nvshmem_team_my_pe(NVSHMEMX_TEAM_NODE);
CUDA_CHECK(cudaSetDevice(mype_node));
CUDA_CHECK(cudaStreamCreate(&stream));
int *destination = (int *) nvshmem_malloc (sizeof(int));
simple_shift<<<1, 1, 0, stream>>>(destination);
nvshmemx_barrier_all_on_stream(stream);
CUDA_CHECK(cudaMemcpyAsync(&msg, destination, sizeof(int),
cudaMemcpyDeviceToHost, stream));
CUDA_CHECK(cudaStreamSynchronize(stream));
printf("%d: received message %d and hostname:%s\n", nvshmem_my_pe(), msg, hostname);
nvshmem_free(destination);
nvshmem_fence();
nvshmem_finalize();
MPI_Finalize();
return 0;
}
the error becomes
/home/xxxx/nvshmem_src_2.10.1-3/src/modules/transport/common/transport_ib_common.cpp:84: NULL value mem registration failed
/home/xxxx/nvshmem_src_2.10.1-3/src/modules/transport/ibrc/ibrc.cpp:500: non-zero status: 2 Unable to register memory handle.
[yyyy:446187:0:446187] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x10)
/home/xxxx/nvshmem_src_2.10.1-3/src/modules/transport/common/transport_ib_common.cpp:84: NULL value mem registration failed
/home/xxxx/nvshmem_src_2.10.1-3/src/modules/transport/ibrc/ibrc.cpp:500: non-zero status: 2 Unable to register memory handle.
[yyyy:446188:0:446188] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x10)
Is there any problem with my nvshmem or RDMA?