CUDA: copyout Memcpy FAILED: 4(unspecified launch failure)

Hi all,

When running a CUDA fortran test I had the following error:

0: copyout Memcpy (host=0x7fdb27b674c0, dev=0xb00300000, size=1053928) FAILED: 4(unspecified launch failure)

Here is my code:


module declare

   implicit none

   integer fanum,falen,nfa,maxfalen,n

   integer,allocatable,dimension(:)::afilen
   integer,allocatable,dimension(:,:)::afil

   double precision k_a,l_a

   double precision,allocatable,dimension(:)::xfa,yfa,zfa
   double precision,allocatable,dimension(:)::fxbondfa,fybondfa,fzbondfa


   real,allocatable,dimension(:)::fxw,fyw,fzw

   double precision zero

!  for device:

   double precision,allocatable,device::fxbondfa_d(:),fybondfa_d(:),fzbondfa_d(:)
   double precision,allocatable,device::fxanglfa_d(:),fyanglfa_d(:),fzanglfa_d(:)
   double precision,allocatable,device::xfa_d(:),yfa_d(:),zfa_d(:)

   integer,allocatable,device::afilen_d(:),atyp_d(:),afil_d(:,:)

   integer istat
end module

!------------------------------------------------
                       
program test

use declare
use cudafor
use dev

implicit none

   k_a=1000.0d0 ! unit = 1e-20 J/nm**2
   l_a=5.4d0    ! 

   call getinfo(fanum,maxfalen,nfa)

   allocate(xfa(nfa),yfa(nfa),zfa(nfa),afil(maxfalen,fanum),afilen(fanum))

   call ringin(fanum,maxfalen,afil,afilen,nfa,xfa,yfa,zfa)

   allocate(fxbondfa(nfa),fybondfa(nfa),fzbondfa(nfa))

      allocate(fxw(nfa),fyw(nfa),fzw(nfa))

      allocate(fxbondfa_d(nfa),fybondfa_d(nfa),fzbondfa_d(nfa))

      allocate(xfa_d(nfa),yfa_d(nfa),zfa_d(nfa))
      xfa_d=xfa; yfa_d=yfa; zfa_d=zfa


      allocate(afil_d(maxfalen,fanum),afilen_d(fanum))

      do n=1,fanum
         afil_d(:,n)=afil(:,n)
      end do

      afilen_d=afilen

   istat = cudaSetDevice(0)

   zero=0.0d0

      call force_d<<<(fanum-1)/512+1,512>>>(fxbondfa_d,fybondfa_d,fzbondfa_d,fanum, &
           afil_d,afilen_d,xfa_d,yfa_d,zfa_d,k_a,l_a,zero)

      fxw=fxbondfa_d
      fyw=fybondfa_d
      fzw=fzbondfa_d

end


!------------------------------------------------------------------------------

module dev

contains

   subroutine getinfo(fanum,maxfalen,nfa)

   implicit none

   integer fanum,maxfalen,nfa
   character (len=64) fileconf,chara


      fileconf='rconf000.inp'

   open(1,file=fileconf)


3  read(1,*)chara

   if(chara(1:4)/='FACT')then
      goto 3
   end if

   read(1,*)nfa,fanum,maxfalen



   close(1)

   end subroutine

!=========================================================

   subroutine ringin(fanum,maxfalen,afil,afilen,nfa,xfa,yfa,zfa)


   implicit none

   integer,value::fanum,nfa
   integer,value::maxfalen
   integer n

   integer,allocatable,dimension(:)::afilen
   integer,allocatable,dimension(:,:)::afil
   double precision,allocatable,dimension(:)::xfa,yfa,zfa

   character (len=64) filecoor,fileconf,chara

      fileconf='rconf000.inp'
      filecoor='rcoor000.inp'
!  read coordinates

   open(1,file=filecoor,form='unformatted')
   read(1)
   read(1)
   read(1)
   read(1)
   read(1)
   read(1)
   read(1)
   read(1)
   read(1)
   read(1)
   read(1)

   read(1)xfa(1:nfa)
   read(1)yfa(1:nfa)
   read(1)zfa(1:nfa)

   close(1)

   open(1,file=fileconf)


3  read(1,*)chara

   if(chara(1:4)/='FACT')then
      goto 3
   end if

   read(1,*)
   read(1,*)
   read(1,*)
   read(1,*)
   read(1,*)
   do n=1,fanum
      read(1,*)afilen(n),afil(1:afilen(n),n)
   enddo

   close(1)

   end subroutine

!=========================================================

attributes(global) &

   subroutine force_d(fxbond,fybond,fzbond,fanum, &
                 afil,afilen,xfa,yfa,zfa,k_a,l_a,zero)

   implicit none

   integer,value:: fanum
   integer nf,jf,n0,n1,n2,n3,tid,bid

   integer,device::afilen(:)
   integer,device::afil(:,:)

   double precision,value::k_a,l_a,zero
   double precision dx,dy,dz,dx1,dy1,dz1,invdist,f,dist1,fx,fy,fz

   double precision,device::xfa(:),yfa(:),zfa(:)
   double precision,device::fxbond(:),fybond(:),fzbond(:)

tid=threadidx%x
bid=blockidx%x

   n1=afil(1,nf)
   fxbond(n1)=0.0d0
   fybond(n1)=0.0d0
   fzbond(n1)=0.0d0

      do jf=2,afilen(nf)

         n2=afil(jf,nf)

         n1=afil(jf-1,nf)

         dx1=xfa(n1)-xfa(n2)
         dy1=yfa(n1)-yfa(n2)
         dz1=zfa(n1)-zfa(n2)

         dist1=sqrt(dx1*dx1+dy1*dy1+dz1*dz1)

         invdist=1.0d0/dist1

         f=k_a*(dist1-l_a)

         fx=f*dx1*invdist
         fy=f*dy1*invdist
         fz=f*dz1*invdist

         fxbond(n1)=fxbond(n1)-fx
         fybond(n1)=fybond(n1)-fy
         fzbond(n1)=fzbond(n1)-fz

         fxbond(n2)=fx
         fybond(n2)=fy
         fzbond(n2)=fz


      end do


   end subroutine

end module

Can someone please tell me what I did wrong?

There are 2 input files, but I don’t see where I can attach them here.

Thanks,

Lam

Hi Lam,

You have an un-initialized variable “nf” being used as an index.

Hope this helps,
Mat