Cuda mpi fortran

when I use cuda fortran with mpi and compiling target , it always print do note find cudagetdevicecount_ and `cudagetdeviceproperties_’ ,please help me slove it
subroutine get_gpu_attribute()

            type(cudaDeviceProp) :: prop
            integer ::nDevices=0,i,istat
            istat=cudaGetDeviceCount(nDevices)
            write(*,"('the gpu num is', i3)"), nDevices
            do i=0,nDevices-1
                    write(*,"('device number: ',i0)") i
                    istat=cudaGetDeviceProperties(prop,i)
                    write(*,"('Compute Capability :',i0,'.',i0)") prop%major,prop%minor
                    write(*,"('number of multiprocessors :',i0)") prop%multiProcessorCount
                    write(*,"('Max Threads per Multiprocessors :',i0)") prop%maxThreadsPerMultiprocessor
                    write(*,"('Global Memory (GB)',i5)") prop%totalGlobalMem/1024**3
                    write(*,"('Execution Configuration Limits')")
                    write(*,"('Max Grid Dims :'2(i0,'x'),i0)") prop%maxGridSize
                    write(*,"('Max Block Dims :'2(i0,'x'),i0)") prop%maxThreadsDim
                    write(*,"('Max Threads per Block:',i0)") prop%maxThreadsPerBlock
            end do

end subroutine get_gpu_attribute
program main
use mpi
use cudafor
use gModule
character * (MPI_MAX_PROCESSOR_NAME) processor_name
integer :: myid , numprocs,namelen,rc,ierr,istat
call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world,myid,ierr)
call mpi_comm_size(mpi_comm_world, numprocs,ierr)
call mpi_get_processor_name(processor_name,namelen,ierr)
print * ,'hello world !process ',myid,processor_name
call get_gpu_attributes()
call mpi_finalize(rc)
end program main
makefile :
CC = mpif90
NVCC =pgfortran

CFLAGS+= -O3

LDFLAGS+= -L/usr/local/cuda/lib64 -lcula -lcula_pgfortran -llapack -lblas

NVCCFLAGS= -I /usr/local/cuda/include

all: target
target: gModule.o main.o
$(CC) $(LDFLAGS+) $^ -o $@
%.o : %.cuf
$(NVCC) $(LDFLAGS+) -o $@ -c $^
%.o: %.f90
$(CC) $(NVCCFLAGS) $(CFLAGS) -o $@ -c $^
clean:
rm *mod
rm *.o

the result of make :
pgfortran -o gModule.o -c gModule.cuf
mpif90 -I /usr/local/cuda/include -O3 -o main.o -c main.f90
mpif90 gModule.o main.o -o target
gModule.o: In function get_gpu_attribute': /g1/u/liuzhe/program/mpi/gModule.cuf:10: undefined reference to cudagetdevicecount_’
/g1/u/liuzhe/program/mpi/gModule.cuf:14: undefined reference to cudagetdeviceproperties_' main.o: In function main’:
/g1/u/liuzhe/program/mpi/main.f90:12: undefined reference to `get_gpu_attributes

which pgfortran mpif90
/g1/app/compiler/pgi194/linux86-64-llvm/19.4/bin/pgfortran
/g1/app/compiler/pgi194/linux86-64-llvm/2019/mpi/openmpi-3.1.3/bin/mpif90

You need to add “use cudafor” in the subroutine (or module it’s contained in) so CUDA routine interfaces are used.

There’s other errors as well. Given you “use gModule”, I assume you meant to include the subroutine in a module. Also, the subroutine name is “get_gpu_attribute” but are calling “get_gpu_attributes”.

Here’s the working version:

% cat test.cuf
module gModule
 use cudafor
 contains

subroutine get_gpu_attribute()
            type(cudaDeviceProp) :: prop
            integer ::nDevices=0,i,istat
            istat=cudaGetDeviceCount(nDevices)
            write(*,"('the gpu num is', i3)"), nDevices
            do i=0,nDevices-1
                    write(*,"('device number: ',i0)") i
                    istat=cudaGetDeviceProperties(prop,i)
                    write(*,"('Compute Capability :',i0,'.',i0)") prop%major,prop%minor
                    write(*,"('number of multiprocessors :',i0)") prop%multiProcessorCount
                    write(*,"('Max Threads per Multiprocessors :',i0)") prop%maxThreadsPerMultiprocessor
                    write(*,"('Global Memory (GB)',i5)") prop%totalGlobalMem/1024**3
                    write(*,"('Execution Configuration Limits')")
                    write(*,"('Max Grid Dims :'2(i0,'x'),i0)") prop%maxGridSize
                    write(*,"('Max Block Dims :'2(i0,'x'),i0)") prop%maxThreadsDim
                    write(*,"('Max Threads per Block:',i0)") prop%maxThreadsPerBlock
            end do
end subroutine get_gpu_attribute
end module gModule

program main
use mpi
use cudafor
use gModule
character * (MPI_MAX_PROCESSOR_NAME) processor_name
integer :: myid , numprocs,namelen,rc,ierr,istat
call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world,myid,ierr)
call mpi_comm_size(mpi_comm_world, numprocs,ierr)
call mpi_get_processor_name(processor_name,namelen,ierr)
print * ,'hello world !process ',myid,processor_name
call get_gpu_attribute()
call mpi_finalize(rc)
end program main

% mpif90 test.cuf; mpirun -np 1 ./a.out
 hello world !process             0
 dev-sky5                                                                                                                                                                                                                                 
the gpu num is  4
device number: 0
Compute Capability :7.0
number of multiprocessors :80
Max Threads per Multiprocessors :2048
Global Memory (GB)   15
Execution Configuration Limits
Max Grid Dims :2147483647x65535x65535
Max Block Dims :1024x1024x64
Max Threads per Block:1024
device number: 1
Compute Capability :7.0
number of multiprocessors :80
Max Threads per Multiprocessors :2048
Global Memory (GB)   15
Execution Configuration Limits
Max Grid Dims :2147483647x65535x65535
Max Block Dims :1024x1024x64
Max Threads per Block:1024
device number: 2
Compute Capability :7.0
number of multiprocessors :80
Max Threads per Multiprocessors :2048
Global Memory (GB)   15
Execution Configuration Limits
Max Grid Dims :2147483647x65535x65535
Max Block Dims :1024x1024x64
Max Threads per Block:1024
device number: 3
Compute Capability :7.0
number of multiprocessors :80
Max Threads per Multiprocessors :2048
Global Memory (GB)   15
Execution Configuration Limits
Max Grid Dims :2147483647x65535x65535
Max Block Dims :1024x1024x64
Max Threads per Block:1024

Hope this helps,
Mat

thank you very much, the problem troubled me three days . i work well according your method ,but ,i have a question . the problem is printing “undefined reference to `cudagetdevicecount_'” when i use makefile(the makefile is follow), when using mpif90 gModule.cuf main.f90 , it will work right ,please tell me why , so i can sovle it forever .
makefile is :
CC = mpif90
NVCC =mpif90
CFLAGS+= -O3
LDFLAGS+=
NVCCFLAGS= -I /usr/local/cuda/include
all: target
target: gModule.o main.o
$(CC) $(LDFLAGS+) $^ -o $@
gModule.o : gModule.cuf
$(NVCC) -c $^
main.o: main.f90
$(CC) $(NVCCFLAGS) $(CFLAGS) -c $^
clean:
rm *mod
rm *.o

Add the “-Mcuda” flag to your LDFLAGS.

For files with the “.cuf” extension, “-Mcuda” (i.e. enable CUDA Fortran) is implied. However for other file extensions, like “.o”, the compiler doesn’t know you’re using CUDA Fortran so isn’t adding the CUDA Fortran runtime libraries.

1 Like

thank you very much ,i benefit very much from your answer , and hope you give me more help when you are free from work