Hello every one
I was using CUDA C till now. My work is shift monte corlo software onto GPU. since the code is in fortran, i thought it would be nice to use CUDA FORTRAN developed by PGI and NVIDIA.
i read the programming guide for CUDA fortran by PGI group.
I stared with the sample programs to get used to the languge but I am getting same errors for the all the programs which I have tried till now.
I am not able to figure out what is the error.
Please help me out to fix this problem otherwise I will not able to proceed further in my project.
I am giving the vector addition example which I have written, please look into the code and tell what is problem.
Please tell me.
1 module vectoraddition_module
2 use cudafor
3 integer, PARAMETER :: BLOCKSIZE = 16
4 contains
5
6 attributes(global) subroutine vectoraddition_kernel( DeviceVector_A, DeviceVector_B, Device_ResultVector, VectorElements )
7 real :: DeviceVector_A( VectorElements ), DeviceVector_B( VectorElements ), Device_ResultVector( VectorElements )
8 integer, value :: VectorElements
9 integer :: tidx, tidy, threadindex, threadcolumnindex, maximumthread
10
11 tidx = threadidx%x
12 tidy = threadidx%y
13 threadindex = tidx * BLOCKSIZE + tidy
14 maximumthread = BLOCKSIZE * BLOCKSIZE
15
16
17 if (threadindex < VectorElements) Device_ResultVector(threadindex) = DeviceVector_A(threadindex) + DeviceVector_B(threadindex)
19
20
21
22 call syncthreads()
23 end subroutine vectoraddition_kernel
24
25 attributes(host) subroutine vectoraddition( VectorA, VectorB, ResultVector, VectorElements )
26 real :: VectorA( VectorElements ), VectorB( VectorElements ), ResultVector( VectorElements )
27 integer, value :: VectorElements
28 real, allocatable, device :: DeviceVectorA(:), DeviceVectorB(:), DeviceResultVector(:)
29 type(dim3) :: dimGrid, dimBlock
30 integer :: istat
31
32 istat = cudaMalloc( DeviceVectorA, VectorElements )
33 istat = cudaMalloc( DeviceVectorB, VectorElements )
34 istat = cudaMalloc( DeviceResultVector, VectorElements )
35
36 istat = cudaMemcpy( DeviceVectorA, VectorA, VectorElements, cudaMemcpyHostToDevice )
37 istat = cudaMemcpy( DeviceVectorB, VectorB, VectorElements, cudaMemcpyHostToDevice )
38
39 dimGrid = dim3( 1, 1, 1 )
40 dimblock = dim3( BLOCKSIZE, BLOCKSIZE, 1 )
41
42 call vectoraddition_kernel<<<dimGrid, dimBlock>>>( DeviceVectorA, DeviceVectorB, DeviceResultVector, VectorElements )
43
44 istat = cudaMemcpy(ResultVector, DeviceResultVector, VectorElements, cudaMemcpyDeviceToHost)
45
46 istat = cudaFree(DeviceVectorA)
47 istat = cudaFree(DeviceVectorB)
48 istat = cudaFree(DeviceResultVector)
49 end subroutine vectoraddition
50
51 end module vectoraddition_module
52
53 program vectoraddition_program
54 use vectoraddition_module
55 IMPLICIT NONE
56 real, allocatable :: VectorA(:), VectorB(:), ResultVector(:)
57 integer :: VectorElements, index
58
59 VectorElements = BLOCKSIZE * BLOCKSIZE
60
61 allocate(VectorA(VectorElements))
62 allocate(VectorB(VectorElements))
63 allocate(ResultVector(VectorElements))
64
65 do index = 1,VectorElements
66 VectorA(Index) = 1.0
67 VectorB(index) = 1.0
68 ResultVector(index) = 0.0
69 end do
70
71 call vectoraddition(VectorA, VectorB, ResultVector, VectorElements)
72
73 do index = 1,VectorElements
74 write(*)ResultVector(index)
75 end do
76
77 deallocate(VectorA)
78 deallocate(VectorB)
79 deallocate(ResultVector)
80
81 end program vectoraddition_program
When I am compiling I am getting these errors
PGF90-S-0188-Argument number 1 to vectoraddition_kernel: type mismatch (VectorAdditionCudaFortran.cuf: 42)
PGF90-S-0188-Argument number 2 to vectoraddition_kernel: type mismatch (VectorAdditionCudaFortran.cuf: 42)
PGF90-S-0188-Argument number 3 to vectoraddition_kernel: type mismatch (VectorAdditionCudaFortran.cuf: 42)
0 inform, 0 warnings, 3 severes, 0 fatal for vectoraddition
Same mismatch errors are coming for other programs which I have written.
Please tell me where I have gone wrong. This is very important to me.
I will be very grateful to you if you guide me to fix this problem.
Thank you
With love and Regards
Praveen