NVSHMEM program fails to initialize

Hi All,

I am trying to run the sample communication ring program using nvshmem. Here is the code:

#include <stdio.h>
#include <cuda.h>
#include <nvshmem.h>
#include <nvshmemx.h>

global void simple_shift(int *destination) {
int mype = nvshmem_my_pe();
int npes = nvshmem_n_pes();
int peer = (mype + 1) % npes;

nvshmem_int_p(destination, mype, peer);

}

int main(void) {
int mype_node, msg;
cudaStream_t stream;

nvshmem_init();
mype_node = nvshmem_team_my_pe(NVSHMEMX_TEAM_NODE);
cudaSetDevice(mype_node);
cudaStreamCreate(&stream);

int *destination = (int *) nvshmem_malloc(sizeof(int));

simple_shift<<<1, 1, 0, stream>>>(destination);
nvshmemx_barrier_all_on_stream(stream);
cudaMemcpyAsync(&msg, destination, sizeof(int), cudaMemcpyDeviceToHost, stream);

cudaStreamSynchronize(stream);
printf("%d: received message %d\n", nvshmem_my_pe(), msg);

nvshmem_free(destination);
nvshmem_finalize();
return 0;

}

The code is saved in the file named test.cu
Here is the Makefile:

NVCC=nvcc
HC=icpc
GENCODE_SM60 := -gencode arch=compute_60,code=compute_60
GENCODE_SM70 := -gencode arch=compute_70,code=sm_70 -gencode arch=compute_70,code=compute_70

GENCODE_FLAGS := $(GENCODE_SM70)

NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -lnvToolsExt $(GENCODE_FLAGS) -std=c++11 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
test-nvshmem: Makefile test.cu
$(NVCC) $(NVCC_FLAGS) test.cu -c -o test.o
$(NVCC) $(GENCODE_FLAGS) test.o -o test-nvshmem $(NVCC_LDFLAGS)

.PHONY.: clean
clean:
rm -f test-nvshmem test.o

memcheck: test-nvshmem
cuda-memcheck ./test-nvshmem

run: test-nvshmem
./test-nvshmem

The nvshmem version is 1.1.3

Thanks for your help.