i install the nvshmem_src_2.19 with the follow command
export NVSHMEM_PREFIX=~/nvshmem
export UCX_HOME=~/UCX
cmake -DNVSHMEM_PREFIX=~/nvshmem -DNVSHMEM_IBRC_SUPPORT=1 -DNVSHMEM_UCX_SUPPORT=1 -DNVSHMEM_IBGDA_SUPPORT=1 -DNVSHMEM_MPI_SUPPORT=1 -DNVSHMEM_MPI_IS_OMPI=1 ..
Then my code is below
#include <stdio.h>
#include "mpi.h"
#include "nvshmem.h"
#include "nvshmemx.h"
#include <unistd.h>
#define CUDA_CHECK(stmt) \
do { \
cudaError_t result = (stmt); \
if (cudaSuccess != result) { \
fprintf(stderr, "[%s:%d] CUDA failed with %s \n", \
__FILE__, __LINE__, cudaGetErrorString(result)); \
exit(-1); \
} \
} while (0)
__global__ void simple_shift(int *destination) {
int mype = nvshmem_my_pe();
int npes = nvshmem_n_pes();
int peer = (mype + 1) % npes;
nvshmem_int_p(destination, mype, peer);
}
int main (int argc, char *argv[]) {
int mype_node, msg;
cudaStream_t stream;
int rank, nranks;
char hostname[256];
gethostname(hostname, 256);
MPI_Comm mpi_comm = MPI_COMM_WORLD;
nvshmemx_init_attr_t attr;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nranks);
printf("Rank: %d, Hostname: %s\n", rank, hostname);
attr.mpi_comm = &mpi_comm;
nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr);
mype_node = nvshmem_team_my_pe(NVSHMEMX_TEAM_NODE);
CUDA_CHECK(cudaSetDevice(mype_node));
CUDA_CHECK(cudaStreamCreate(&stream));
int *destination = (int *) nvshmem_malloc (sizeof(int));
simple_shift<<<1, 1, 0, stream>>>(destination);
nvshmemx_barrier_all_on_stream(stream);
CUDA_CHECK(cudaMemcpyAsync(&msg, destination, sizeof(int),
cudaMemcpyDeviceToHost, stream));
CUDA_CHECK(cudaStreamSynchronize(stream));
printf("%d: received message %d and hostname:%s\n", nvshmem_my_pe(), msg, hostname);
nvshmem_free(destination);
nvshmem_finalize();
MPI_Finalize();
return 0;
}
my execution command is
export NVSHMEM_DEBUG=TRACE
export NVSHMEM_DEBUG_SUBSYS=ALL
nvshmrun -n 2 -ppn 1 --host ip1:1,ip2:1 ./worker > nvshmem.log 2>&1
my log is
Rank: 1, Hostname: xxxxx-0-11
Rank: 0, Hostname: xxxxx-0-9
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO host name: xxxxx-0-9 hash 14953616763125465901
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO host name: xxxxx-0-11 hash 13854007266692032502
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO PE distribution has been identified as NVSHMEMI_PE_DIST_ROUNDROBIN
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO PE 1 (process) affinity to 128 CPUs:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
126 127
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO cudaDriverVersion 12030
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO NVSHMEM symmetric heap kind = DEVICE selected
/home/yyyy/nvshmem_src_2.10.1-3/src/host/init/init.cu:816: non-zero status: 5 xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO nvshmemi_common_init failed, continuing
nvshmem get cucontext failed
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO PE distribution has been identified as NVSHMEMI_PE_DIST_ROUNDROBIN
NVSHMEM configuration:
CUDA API 12010
CUDA Runtime 12020
CUDA Driver 12030
Build Timestamp Mar 15 2024 08:32:29
Build Variables
NVSHMEM_DEBUG=OFF NVSHMEM_DEVEL=OFF NVSHMEM_DEFAULT_PMI2=OFF
NVSHMEM_DEFAULT_PMIX=OFF NVSHMEM_DEFAULT_UCX=OFF NVSHMEM_DISABLE_COLL_POLL=ON
NVSHMEM_ENABLE_ALL_DEVICE_INLINING=OFF NVSHMEM_ENV_ALL=OFF
NVSHMEM_GPU_COLL_USE_LDST=OFF NVSHMEM_IBGDA_SUPPORT=1
NVSHMEM_IBGDA_SUPPORT_GPUMEM_ONLY=OFF NVSHMEM_IBDEVX_SUPPORT=OFF
NVSHMEM_IBRC_SUPPORT=0 NVSHMEM_LIBFABRIC_SUPPORT=OFF NVSHMEM_MPI_SUPPORT=1
NVSHMEM_NVTX=ON NVSHMEM_PMIX_SUPPORT=OFF NVSHMEM_SHMEM_SUPPORT=OFF
NVSHMEM_TEST_STATIC_LIB=OFF NVSHMEM_TIMEOUT_DEVICE_POLLING=OFF NVSHMEM_TRACE=OFF
NVSHMEM_UCX_SUPPORT=1 NVSHMEM_USE_DLMALLOC=OFF NVSHMEM_USE_NCCL=OFF
NVSHMEM_USE_GDRCOPY=ON NVSHMEM_VERBOSE=OFF CUDA_HOME=/usr/local/cuda
GDRCOPY_HOME=/usr/local/gdrdrv LIBFABRIC_HOME=/usr/local/libfabric
MPI_HOME=/usr/local/ompi NCCL_HOME=/usr/local/nccl
NVSHMEM_PREFIX=/home/yyyy/nvshmem PMIX_HOME=/usr SHMEM_HOME=/usr/local/ompi
UCX_HOME=/home/yyyy/UCX
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO PE 0 (process) affinity to 128 CPUs:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
126 127
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO cudaDriverVersion 12030
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO NVSHMEM symmetric heap kind = DEVICE selected
/home/yyyy/nvshmem_src_2.10.1-3/src/host/init/init.cu:816: non-zero status: 5 nvshmem get cucontext failed
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO nvshmemi_common_init failed, continuing
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO NVSHMEM symmetric heap kind = DEVICE selected
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO [1] nvshmemi_get_cucontext->cuCtxSynchronize->CUDA_SUCCESS) my_stream (nil)
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO in get_cucontext, queried and saved context for device: 0 context: 0x43bc7e0
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO [1] nvshmemi_get_cucontext->cuCtxGetDevice->0(CUDA_ERROR_INVALID_CONTEXT 201) cuStreamCreateWithPriority my_stream 0x4efbcb0
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO NVSHMEM symmetric heap kind = DEVICE selected
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO [0] nvshmemi_get_cucontext->cuCtxSynchronize->CUDA_SUCCESS) my_stream (nil)
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO in get_cucontext, queried and saved context for device: 0 context: 0x43a5b60
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO [0] nvshmemi_get_cucontext->cuCtxGetDevice->0(CUDA_ERROR_INVALID_CONTEXT 201) cuStreamCreateWithPriority my_stream 0x4ee61a0
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO host name: xxxxx-0-11 hash 13854007266692032502
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO host name: xxxxx-0-9 hash 14953616763125465901
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO nvshmemi_setup_local_heap, heapextra = 285225000
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO [0] mspace ptr: 0x2e18100
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO host name: xxxxx-0-9 hash 14953616763125465901
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO nvshmemi_setup_local_heap, heapextra = 285225000
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO [1] mspace ptr: 0x2e18100
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO host name: xxxxx-0-11 hash 13854007266692032502
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO UCX transport skipped in favor of: ibrc
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO IBGDA Disabled by the environment.
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO [0x57fcc40] ndev 4 pcie_devid 0 cudevice 0 peer host hash cf85eb37e96b832d p2p host hash cf85eb37e96b832d
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO [0] reach 15 to peer 0 over transport 0
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO [0x57fcc40] ndev 4 pcie_devid 0 cudevice 0 peer host hash c043523516dbe7f6 p2p host hash cf85eb37e96b832d
xxxxx-0-9:2682207:2682207 [0] NVSHMEM INFO [0] reach 0 to peer 1 over transport 0
/home/yyyy/nvshmem_src_2.10.1-3/src/host/topo/topo.cpp:420: [GPU 0] Peer GPU 1 is not accessible, exiting ...
/home/yyyy/nvshmem_src_2.10.1-3/src/host/init/init.cu:843: non-zero status: 3 building transport map failed
/home/yyyy/nvshmem_src_2.10.1-3/src/host/init/init.cu:nvshmemi_check_state_and_init:933: nvshmem initialization failed, exiting
/home/yyyy/nvshmem_src_2.10.1-3/src/util/cs.cpp:23: non-zero status: 16: File exists, exiting... mutex destroy failed
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO UCX transport skipped in favor of: ibrc
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO IBGDA Disabled by the environment.
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO [0x5812750] ndev 4 pcie_devid 0 cudevice 0 peer host hash cf85eb37e96b832d p2p host hash c043523516dbe7f6
xxxxx-0-11:3667279:3667279 [0] NVSHMEM INFO [1] reach 0 to peer 0 over transport 0
/home/yyyy/nvshmem_src_2.10.1-3/src/host/topo/topo.cpp:420: [GPU 1] Peer GPU 0 is not accessible, exiting ...
/home/yyyy/nvshmem_src_2.10.1-3/src/host/init/init.cu:843: non-zero status: 3 building transport map failed
/home/yyyy/nvshmem_src_2.10.1-3/src/host/init/init.cu:nvshmemi_check_state_and_init:933: nvshmem initialization failed, exiting
/home/yyyy/nvshmem_src_2.10.1-3/src/util/cs.cpp:23: non-zero status: 16: File exists, exiting... mutex destroy failed