Hi, I am trying to build NVSHMEM and use it in my program.
I follow the instruction on NVSHMEM Installation to build.
My software are:
OpenMPI: 4.1.4
UCX: 1.13.1
CUDA: 11.8
GCC: 8.5.0
CMAKE: 3.26.3
NVSHMEM: 2.5.0-19
NCCL: 2.23
I use the following command to compile nvshmem:
export CUDA_HOME=/opt/cuda/11.8
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=/ompi/install
export NVSHMEM_GPUINITIATED_SUPPORT=1
export NVSHMEM_PREFIX=/nvshmem_src_2.5.0-19/install
export NVSHMEM_USE_NCCL=1
export NCCL_HOME=/nccl/build
export GDRCOPY_HOME=/opt/gdrcopy/2.3
cd nvshmem_src_2.5.0-19
make -j install
After compilation, I got
which seems succeeded.
Then I use nvcc to compile the offical example:
#include <stdio.h>
#include <cuda.h>
#include <nvshmem.h>
#include <nvshmemx.h>
__global__ void simple_shift(int *destination) {
int mype = nvshmem_my_pe();
int npes = nvshmem_n_pes();
int peer = (mype + 1) % npes;
nvshmem_int_p(destination, mype, peer);
}
int main(void) {
int mype_node, msg;
cudaStream_t stream;
nvshmem_init();
mype_node = nvshmem_team_my_pe(NVSHMEMX_TEAM_NODE);
cudaSetDevice(mype_node);
cudaStreamCreate(&stream);
int *destination = (int *) nvshmem_malloc(sizeof(int));
simple_shift<<<1, 1, 0, stream>>>(destination);
nvshmemx_barrier_all_on_stream(stream);
cudaMemcpyAsync(&msg, destination, sizeof(int), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
printf("%d: received message %d\n", nvshmem_my_pe(), msg);
nvshmem_free(destination);
nvshmem_finalize();
return 0;
}
nvcc nvShmemTest.cu -o nvShmemTest -arch=compute_80 -code=sm_80 -L/ompi/install/lib -I/ompi/install/include -L/nccl/build/lib -I/nccl/build/include -I/nvshmem_src_2.5.0-19/install/include -L/nvshmem_src_2.5.0-19/install/lib -lmpi -lnccl -lnvshmem
But it gives the following error:
ptxas fatal : Unresolved extern function 'nvshmem_my_pe'
It seems like link error but I have configured the correct include and lib path to nvShmem.
Any help is appreciated!!
