I’ve been looking at modifying the CUFFT example in /local/software/pgi/11.9/linux86-64/2011/cuda/cudaFortranSDK/. Whilst I can get the original version to compile with no issues I am getting the following complaints at compilation time for a slightly modified version:
[kaw2e11@UOS-205126 isolated_modified_cufft]$ make
pgf90 -fast -c precision_m.cuf
pgf90 -fast -c cufft_m.cuf
pgf90 -Mcuda=3.2 -fast -c fourier_gpu_m.F90
pgf90 -Mcuda=3.2 -fast -o cufftTest cufftTest.F90 fourier_gpu_m.o precision_m.o -lcufft
cufftTest.F90:
fourier_gpu_m.o: In function `fourier_gpu_m_fourier_gpu_':
/home/kaw2e11/PROJECTS/GPU/cudaFortranSDK/isolated_modified_cufft/./fourier_gpu_m.F90:18: undefined reference to `cufftexec_'
/home/kaw2e11/PROJECTS/GPU/cudaFortranSDK/isolated_modified_cufft/./fourier_gpu_m.F90:18: undefined reference to `cufftexec_'
fourier_gpu_m.o: In function `.C2_283':
fourier_gpu_m.F90:(.data+0x14): undefined reference to `cufft_m_'
make: *** [cufftTest] Error 2
program cufftTest
use precision_m
use fourier_gpu_m
implicit none
complex(fp_kind), allocatable :: a(:),b(:)
integer i
integer :: n=8
! allocate arrays on the host
allocate(a(n), b(n))
!initialize arrays on host
do i = 1, n
a(i) = cmplx(cos((i-1) * atan2(0.0,-1.0) / n), 0.0)
end do
! Print initial array
print *, "Array A:"
write (*,"(8('(',f6.3,',',f6.3,')',1x))") a
call fourier_gpu(n,a,b)
! Copy results back to host
print *, "Inverse B"
write (*,"(8('(',f6.3,',',f6.3,')',1x))") b
! Scale
b = b / n
print *, "Scaled B"
write (*,"(8('(',f6.3,',',f6.3,')',1x))") b
!release memory on the host
deallocate(a, b)
end program cufftTest
module fourier_gpu_m
public fourier_gpu
contains
subroutine fourier_gpu(n, a, b)
use precision_m
use cufft_m
implicit none
integer n
complex(fp_kind) :: a(n),b(n)
complex(fp_kind), device, allocatable :: a_d(:), b_d(:)
integer :: plan, planType
! Allocate arrays on the device
allocate(a_d(n), b_d(n))
! Copy a array to device
a_d = a
! Set planType to either single or double precision
if (fp_kind == singlePrecision) then
planType = CUFFT_C2C
else
planType = CUFFT_Z2Z
endif
! Initialize the plan and execute the FFTs.
call cufftPlan1D(plan,n,planType,1)
call cufftExec(plan,planType,a_d,b_d,CUFFT_FORWARD)
call cufftExec(plan,planType,b_d,b_d,CUFFT_INVERSE)
! Copy results back to host
b = b_d
! Release memory on the device
deallocate(a_d, b_d)
! Destroy the plan
call cufftDestroy(plan)
end subroutine fourier_gpu
end module fourier_gpu_m
# All these examples can run with various pgfortran options. -fast is fine.
F90FLAGS = -fast
OBJS = cufftTest
all: $(OBJS)
# cufftTest
cufftTest: cufftTest.F90 fourier_gpu_m.o precision_m.o
pgf90 -Mcuda=3.2 $(CUDAFLAGS) $(F90FLAGS) -o $@ $^ -lcufft
fourier_gpu_m.o: fourier_gpu_m.F90 cufft_m.o precision_m.o
pgf90 -Mcuda=3.2 $(CUDAFLAGS) $(F90FLAGS) -c $<
cufft_m.o: cufft_m.cuf precision_m.o
pgf90 $(CUDAFLAGS) $(F90FLAGS) -c $<
# auxiliary modules
precision_m.o: precision_m.cuf
pgf90 $(CUDAFLAGS) $(F90FLAGS) -c $<
#Clean up
clean:
rm -rf *.o *.mod $(OBJS) *~
Any suggestions?
Cheers,
Karl