Hello,
In this presentation http://www.cc.gatech.edu/~vetter/keeneland/tutorial-2011-04-14/13-cuda_advmpi_keeneland.pdf
it is stated on slide 17 that with gpudirect v2 the user does not have to express the memory transfers from device
to system memory to send/receive data with MPI through infiniband (“User sees direct transfer”), I think thanks to UVA.
So I try to write this simple program to validate this feature, but it does not work.
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <sys/time.h>
#include <mpi.h>
#define NREPEAT 1
#define NBYTES 10.e6
int IsAppBuiltAs64() {
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
return 1;
#else
return 0;
#endif
}
int main (int argc, char *argv[])
{
int rank, size, n, len;
int result;
void *a_h, *a_d;
struct timeval time[2];
double bandwidth;
char hostname[MPI_MAX_PROCESSOR_NAME];
MPI_Status status;
int device=0;
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
MPI_Comm_size (MPI_COMM_WORLD, &size);
MPI_Get_processor_name(hostname, &len);
printf("Process %d is on %s\n", rank, hostname);
/* select device */
if ( rank == 0 ) {
cudaSetDevice(0);
}
else {
cudaSetDevice(0);
}
/* display the unified capabilities of the device */
struct cudaDeviceProp prop;
result = cudaGetDeviceProperties(&prop,device);
if ( result != cudaSuccess ) {
printf("ERROR: %s: cudaGetDeviceProperties failed, error code: %d, which means: %s\n",
hostname, result, cudaGetErrorString(result));
}
printf( "rank %d prop.unifiedAddressing=%d\n", rank, prop.unifiedAddressing );
/* device memory allocation */
result = cudaMalloc( (void **) &a_d, NBYTES);
if ( result != cudaSuccess ) {
printf("ERROR: %s: cudaMalloc failed, error code: %d, which means: %s\n",
hostname, result, cudaGetErrorString(result));
exit(1);
}
printf( "rank %d build as 64 %d\n", rank, IsAppBuiltAs64() );
/* Test MPI send/recv bandwidth. */
MPI_Barrier(MPI_COMM_WORLD);
gettimeofday(&time[0], NULL);
for (n=0; n<NREPEAT; n++)
{
if (rank == 0) {
MPI_Send(a_d, NBYTES/sizeof(int), MPI_INT, 1, 0, MPI_COMM_WORLD);
}
else {
MPI_Recv(a_d, NBYTES/sizeof(int), MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
}
}
gettimeofday(&time[1], NULL);
bandwidth = time[1].tv_sec - time[0].tv_sec;
bandwidth += 1.e-6*(time[1].tv_usec - time[0].tv_usec);
bandwidth = NBYTES*NREPEAT/1.e6/bandwidth;
if (rank == 0)
printf("MPI send/recv bandwidth: %f MB/sec\n", bandwidth);
cudaFree(a_d);
MPI_Finalize();
return 0;
}
Here the console output I obtain:
[xxxx@dhcp1 test_mpi_gpudirect_v2]$ ./runib
Process 1 is on dhcp1
Process 0 is on dhcp2
rank 1 prop.unifiedAddressing=1
rank 0 prop.unifiedAddressing=1
rank 1 build as 64 1
rank 0 build as 64 1
[dhcp2:06536] *** Process received signal ***
[dhcp2:06536] Signal: Segmentation fault (11)
[dhcp2:06536] Signal code: Invalid permissions (2)
[dhcp2:06536] Failing at address: 0x200100000
[dhcp2:06536] [ 0] /lib64/libpthread.so.0 [0x3f8b80eb10]
[dhcp2:06536] [ 1] /lib64/libc.so.6(memcpy+0x15b) [0x3f8ac7c39b]
[dhcp2:06536] [ 2] /usr/mpi/gcc/openmpi-1.4.3/lib64/libmpi.so.0(ompi_convertor_pack+0x12c) [0x2aadcbc4e84c]
[dhcp2:06536] [ 3] /usr/mpi/gcc/openmpi-1.4.3/lib64/openmpi/mca_btl_openib.so [0x2aadcf216c82]
[dhcp2:06536] [ 4] /usr/mpi/gcc/openmpi-1.4.3/lib64/openmpi/mca_pml_ob1.so [0x2aadce5d0935]
[dhcp2:06536] [ 5] /usr/mpi/gcc/openmpi-1.4.3/lib64/openmpi/mca_pml_ob1.so [0x2aadce5c6f90]
[dhcp2:06536] [ 6] /usr/mpi/gcc/openmpi-1.4.3/lib64/libmpi.so.0(PMPI_Send+0x13d) [0x2aadcbc6495d]
[dhcp2:06536] [ 7] ./gpudirect_v2(main+0x199) [0x400e1c]
[dhcp2:06536] [ 8] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3f8ac1d994]
[dhcp2:06536] [ 9] ./gpudirect_v2 [0x400bc9]
[dhcp2:06536] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 6536 on node 192.168.0.1 exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
So, my question is does the sentence “User sees direct transfer” mean that it is possible to pass a device pointer to MPI functions?
and if so how is it possible?
Best regards.