Some prob about openACC

When I use openACC to accelerate my program the following errors arouse, what do these error mean?

call to cuMemcpyDtoHAsync returned error 719: Launch failed (often invalid pointer dereference)

call to cuMemFreeHost returned error 719: Launch failed (often invalid pointer dereference)

These are somewhat generic errors meaning that the data copy and freeing of managed memory failed.

Can you please give more details and possibly an example of where where failure occurs?

The following is the code:

    module Global_Variable
		use openacc
        implicit none
		
        real*8,parameter :: PI=3.14159265358979d0

        !dimensions of the flame, in mm
        real*8,parameter :: Lx=120.0d0
        real*8,parameter :: Ly=50.0d0
        real*8,parameter :: Lz=50.0d0

        !discretization of the flame
        integer,parameter :: Nx=120   !should be an even #
        integer,parameter :: Ny=120   !should be an even #
        integer,parameter :: Nz=120  !should be an even #
        !integer,parameter :: Nx=10   !should be an even #
        !integer,parameter :: Ny=10   !should be an even #
        !integer,parameter :: Nz=10  !should be an even #
        !integer,parameter :: Nx=60   !should be an even #
        !integer,parameter :: Ny=60   !should be an even #
        !integer,parameter :: Nz=60  !should be an even #

        !increment in x,y,z directions, in microns
        real*8,parameter :: dx=Lx/real(Nx,8)
        real*8,parameter :: dy=Ly/real(Ny,8)
        real*8,parameter :: dz=Lz/real(Nz,8)

        integer,parameter :: CCDNX=1024 !# of pixels in the x direction,!should be an even #
        integer,parameter :: CCDNY=1024 !# of pixels in the y direction,!should be an even #
        real*8,parameter :: CCDpixelSize=0.02d0   !size of each CCD pixel
        !real*8,parameter :: Fnumber=8.0d0

        integer,parameter :: MaxNofVoxelContri=2000   !maximum # of votex contributes to a CCD pixel
        integer,parameter :: TotalNumOfViews=8   !total # of views measured in the exp.
        integer,parameter :: NumOfViewsUsed=8   !# of view used
        integer,parameter :: NumOfFrame=20   !# of view used
        integer,parameter :: SkipOfFrame=1   !# of view used  

        !real*8,parameter :: noiselevel=0.05d0
        real*8,parameter :: Terminallevel=1.0d-2
        real*8,parameter :: beta=0.07d0
        integer,parameter :: MaxNumPhoton=10000
        integer,parameter :: Niteration=50

        real*8, allocatable :: CCDWeightList(:,:,:,:)   !weight of voxel contribute to a pixel on CCD
        real*8, allocatable :: CCDWeightSquare(:,:,:)
        integer, allocatable :: CCDNumOfVoxelContri(:,:,:)              !# of voxels contribute to a pixel on CCD
        integer, allocatable :: CCDVoxelList(:,:,:,:,:) !a list of voxels contribute to a pixel on CCD

        real*8 :: Rc(3,3,NumOfViewsUsed)
        real*8 :: Tc(3,1,NumOfViewsUsed)
        real*8 :: kc(5)
        real*8 :: alpha_c
        real*8 :: fc(2)
        real*8 :: cc(2)

        integer :: iframe=0       ! the index of the image also can be named as the ith iframe

        real*8 :: CCDTrueData(CCDNX,CCDNY,NumOfViewsUsed)
        real*8 :: CCDRecData(CCDNX,CCDNY)    !updated for each view
        real*8 :: CCDPhotons(CCDNX,CCDNY)

        real*8 :: Flame3DMatrixCurr(Nx,Ny,Nz)
        real*8 :: Flame3DMatrixTrue(Nx,Ny,Nz)
        real*8 :: t1,t2,t3,t4


contains

        subroutine initilize(CCDWeightList,CCDWeightSquare,CCDNumOfVoxelContri,CCDVoxelList)

            real*8, allocatable :: CCDWeightList(:,:,:,:)   !weight of voxel contribute to a pixel on CCD
            real*8, allocatable :: CCDWeightSquare(:,:,:)
            integer, allocatable :: CCDNumOfVoxelContri(:,:,:)              !# of voxels contribute to a pixel on CCD
            integer, allocatable :: CCDVoxelList(:,:,:,:,:) !a list of voxels contribute to a pixel on CCD

            integer :: iviews
            integer :: i,j,k
            !type(view),allocatable :: views(:)
            real*8 :: LocationWord(3,1)
            real*8 :: LocationCamera(3,1)
            integer :: iphoton
            real*8:: rnd(3)
            real*8 :: xw,yw,zw,xc,yc,zc,xd1,xd2,xn1,xn2,xp,yp,r_calib_2,dx1_cali,dx2_cali
            integer :: maxi,mini,maxk,mink,tempi
            integer :: ii,kk
            integer :: index1,index2
            real*8 :: NumPhoton
            real*8 :: rconfusion
            real*8:: rsquare,midyu
			integer(kind=8) :: random
			real*8, allocatable :: temp1(:,:,:,:,:,:), temp2(:,:,:,:,:)
			allocate(temp1(NumOfViewsUsed,Nx,Ny,Nz,10,10))
			allocate(temp2(NumOfViewsUsed,Nx,Ny,Nz,4))
			random=1
			
            !call inputdata()   !input CCD data from files
        !
			CCDNumOfVoxelContri(:,:,:)=0
			
			
            do iviews=1,NumOfViewsUsed
				!write(*,*) iviews
                !CCDNumOfVoxelContri(iviews,:,:)=0
                do j=1,Ny
					!write(*,*) j
					!$acc kernels
                    do i=1,Nx
						!write(*,"('i=',i0)") i
                        do k=1,Nz
							!write(*,"('k=',i0)") k
                            mini=2*CCDNX;maxi=-2*CCDNX
                            mink=2*CCDNY;maxk=-2*CCDNY
                            NumPhoton=0.0d0
                            CCDPhotons=0.0d0
							!$ acc loop
                            do iphoton=1,MaxNumPhoton
									!write(*,"('iphoton=',i0)") iphoton

                                    !call random_number(rnd)
									random = random*25214903917 + 11
									rnd(1) = mod(random,65536)/real(65536)
									random = random*25214903917 + 11
									rnd(2) = mod(random,65536)/real(65536)
									random = random*25214903917 + 11
									rnd(3) = mod(random,65536)/real(65536)
                                    xw=dx*real(i-30.0d0/(Lx/Nx)-1,8)+0.5d0*dx+dx*(rnd(1)-0.5d0)
                                    yw=dy*real(j+14.0d0/(Ly/Ny)-1,8)+0.5d0*dy+dy*(rnd(2)-0.5d0)
                                    zw=dz*real(k-30.0d0/(Lz/Nz)-1,8)+0.5d0*dz+dz*(rnd(3)-0.5d0)
                                    rsquare=(xw+Tc(1,1,iviews))**2+(yw+Tc(2,1,iviews))**2+(zw+Tc(3,1,iviews))**2
                                    !
                                    LocationWord(1,1)=xw
                                    LocationWord(2,1)=yw
                                    LocationWord(3,1)=zw
                                    !
                                    LocationCamera=matmul(Rc(:,:,iviews),LocationWord)+Tc(:,:,iviews)
                                    xc=LocationCamera(1,1)
                                    yc=LocationCamera(2,1)
                                    zc=LocationCamera(3,1)

                                    !ObjectDist=zc
                                    !ImageDist=(fc(1)+fc(2))/2*ObjectDist/(ObjectDist-(fc(1)+fc(2))/2)
                                    !magnification=ImageDist/ObjectDist

        !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                                    xc=(fc(1)+fc(2))*CCDpixelSize/2*xc/(zc-(fc(1)+fc(2))*CCDpixelSize/2)       
                                    yc=(fc(1)+fc(2))*CCDpixelSize/2*yc/(zc-(fc(1)+fc(2))*CCDpixelSize/2)
                                    zc=(fc(1)+fc(2))*CCDpixelSize/2*zc/(zc-(fc(1)+fc(2))*CCDpixelSize/2)
                                                                                   

                                    !xc=xc*ZZ0*CCDpixelSize
                                    !yc=yc*ZZ0*CCDpixelSize
                                    xn1=xc/zc                                 
                                    xn2=yc/zc
                                    r_calib_2=xn1**2+xn2**2

                                    dx1_cali=2*kc(3)*xn1*xn2+kc(4)*(r_calib_2+2*xn1*xn1)
                                    dx2_cali=kc(3)*(r_calib_2+2*xn2*xn2)+2*kc(4)*xn1*xn2

                                    xd1=(1+kc(1)*r_calib_2+kc(2)*r_calib_2**2+kc(5)*r_calib_2**3)*xn1+dx1_cali
                                    xd2=(1+kc(1)*r_calib_2+kc(2)*r_calib_2**2+kc(5)*r_calib_2**3)*xn2+dx2_cali

                                    xp=fc(1)*(xd1+alpha_c*xd2)+cc(1)          
                                    yp=fc(2)*xd2+cc(2)                        

                                    if(xp>=1 .and. xp<=CCDNX .and. yp>=1 .and. yp<=CCDNY) then
                                        index1=ceiling(xp)
                                        index2=ceiling(yp)
                                        if(mini>index1) mini=index1
                                        if(maxi<index1) maxi=index1

                                        if(mink>index2) mink=index2
                                        if(maxk<index2) maxk=index2

                                        NumPhoton=NumPhoton+1.0d0
                                        CCDPhotons(index1,index2)=CCDPhotons(index1,index2)+1.0d0
                                    end if

                            end do
							!write(*,"('iphoton=',i0)") iphoton
							
                            CCDPhotons(mini:maxi,mink:maxk)=CCDPhotons(mini:maxi,mink:maxk)/NumPhoton
							!write(*,*) 'divide succ'
							
							temp1(iviews,j,i,k,1:maxi-mini+1,1:maxk-mink+1) = CCDPhotons(mini:maxi,mink:maxk)
							temp2(iviews,j,i,k,1) = mini
							temp2(iviews,j,i,k,2) = maxi
							temp2(iviews,j,i,k,3) = mink
							temp2(iviews,j,i,k,4) = maxk
                            
                            CCDPhotons(mini:maxi,mink:maxk)=0.0d0
                        end do

                    end do
					!$acc end kernels
                end do
            end do
			
			do iviews=1,NumOfViewsUsed
				!write(*,*) iviews
                !CCDNumOfVoxelContri(iviews,:,:)=0
                do j=1,Ny
					!write(*,*) j
                    do i=1,Nx
						!write(*,"('i=',i0)") i
                        do k=1,Nz
                            midyu=maxval(CCDPhotons(mini:maxi,mink:maxk))
                            midyu=minval(CCDPhotons(mini:maxi,mink:maxk))
                            do ii=temp2(iviews,j,i,k,1),temp2(iviews,j,i,k,2)
								!write(*,"('ii=',i0)") ii
                                do kk=temp2(iviews,j,i,k,3),temp2(iviews,j,i,k,4)
									!write(*,"('kk=',i0)") kk
                                    if(temp1(iviews,j,i,k,ii-mini+1,kk-mink+1)>1.0d-4) then

                                            CCDNumOfVoxelContri(iviews,ii,kk)=CCDNumOfVoxelContri(iviews,ii,kk)+1
											!write(*,*) 'succ plus 1'
                                            tempi=CCDNumOfVoxelContri(iviews,ii,kk)
											!write(*,*) tempi
											!write(*,*) CCDPhotons(ii,kk)/rsquare*1.0d8
											!write(*,*) CCDWeightList(iviews,ii,kk,tempi)

                                            CCDWeightList(iviews,ii,kk,tempi)=temp1(iviews,j,i,k,ii-mini+1,kk-mink+1)/rsquare*1.0d8
											!CCDWeightList(iviews,ii,kk,tempi)= 9
											!write(*,*) 'succ to CCDWeightList'
                                            midyu=CCDWeightList(iviews,ii,kk,tempi)
											!write(*,*) 'succ to midyu'
                                            CCDVoxelList(iviews,ii,kk,tempi,1)=i
                                            CCDVoxelList(iviews,ii,kk,tempi,2)=j
                                            CCDVoxelList(iviews,ii,kk,tempi,3)=k

                                    end if
                                end do
                            end do
                        end do

                    end do
                end do
            end do
			

        end subroutine initilize
    end module

program ThreeD_CTC_Sub_Main

    use Global_Variable
    implicit none
    
    integer (kind=4), dimension(8) :: time=0 
        
        
    call date_and_time(values=time)
    write(*, FMT=11) time(1:3), time(5:7)
    write(999, FMT=11) time(1:3), time(5:7)
    call cpu_time(t1)
        
    allocate(CCDWeightList(NumOfViewsUsed,CCDNX,CCDNY,MaxNofVoxelContri))   !weight of voxel contribute to a pixel on CCD
    allocate(CCDWeightSquare(NumOfViewsUsed,CCDNX,CCDNY))   
    allocate(CCDNumOfVoxelContri(NumOfViewsUsed,CCDNX,CCDNY))              !# of voxels contribute to a pixel on CCD
    allocate(CCDVoxelList(NumOfViewsUsed,CCDNX,CCDNY,MaxNofVoxelContri,3)) !a list of voxels contribute to a pixel on CCD
    
    
    !CCDWeightList(:,:,:,:)=0
    !CCDWeightSquare(:,:,:)=0
    CCDNumOfVoxelContri(:,:,:)=0
    !CCDVoxelList(:,:,:,:,:)=0
  
    call initilize(CCDWeightList,CCDWeightSquare,CCDNumOfVoxelContri,CCDVoxelList)
    
    deallocate(CCDWeightList)
    deallocate(CCDWeightSquare)
    deallocate(CCDNumOfVoxelContri)
    deallocate(CCDVoxelList)
    
    pause
end program ThreeD_CTC_Sub_Main

Thanks!