Hello
I have been getting NaN values from a CUDA Fortran subroutine that calculates a matrix, and the NaN values appearance change with each rerun inside the matrix, being replaced by the right values.
I cannot get rid of this mistake, could you please advice?
The name of the array is ‘Km’ and it is being calculated in parallel, while in each thread there is a small loop done.
Can you have a look at please?
module stiff
use cudafor
contains
attributes(global) subroutine TRstif(Km,EA,Le,C,m)
integer, value :: m
real::EA(m),Le(m),C(4,4,m), Km(4,4,m)
integer :: i, j, z
i= threadIdx%x + (blockIdx%x-1) * 1024
do j=1,4
do z=1,4
Km(j,z,i)= EA(i)/Le(i) * C(j,z,i)
end do
end do
call syncthreads()
end subroutine TRstif
end module stiff
program tstiff
use stiff
implicit none
doubleprecision INDX(1200)
doubleprecision A(1000,1000)
doubleprecision X(1000,1000)
doubleprecision,allocatable:: k(:,:),theta(:)
doubleprecision,allocatable:: Le(:) , D2(:,:) ,D3(:,:)
doubleprecision,allocatable:: loadmin(:) ,U(:),Utotal(:),Km(:,:,:)
doubleprecision,allocatable:: mc(:,:) ,D1(:) ,EA(:),load(:)
doubleprecision,allocatable:: Um(:,:),Fm(:),C(:,:,:),Ka(:,:,:)
integer:: i ,nj ,m ,z ,s,w,g, min, n9, n, j
real::u1,u2,u3
integer ::c1,c2,cr,cm, istat
real , device, allocatable, dimension(:) :: EA_d,le_d
real, device, allocatable, dimension(:,:,:)::Km_d,C_d
REAL :: rate
.
.
.
.
.
C_d=C
EA_d=EA
Le_d=Le
Km_d=0
call TRstif<<< ceiling(real(m/1024)),dim3(1024,1,1)>>>(Km_d,EA_d,Le_d,C_d,m)
istat = cudaThreadSynchronize ()
Km=Km_d
write(99,*) Km
I have tried to Sync threads but that does not seem to be the issue. When I print the results, the NaN keeps appearing although in one run I did by chance all the values appeared correct.
Any ideas?
Thank you for your time.
Ahmed