That’s worked a treat, cheers guys.
But, as programming tends to go, one problem solved another problem formed…
I really don’t have a clue what it doesn’t like about this code:
Module Acceler_formd
USE cudafor
implicit none
parameter(maxatm=DEFMAXATM,maxelmnt=DEFMAXELMNT)
! GPU specific declarations
integer,allocatable,device,dimension(:)::ian_d
integer,allocatable,device,dimension(:)::natorb_d
integer,allocatable,device,dimension(:)::lowlim_d
double precision,allocatable,device,dimension(:)::globdens,ftot_d
integer,allocatable,device,dimension(:,:)::totsubsys_d,
& coresubsys_d
integer,allocatable,device,dimension(:)::subbasis_d
double precision,allocatable,device,dimension(:,:)::B,
& subeval,subnelec
double precision,allocatable,device,dimension(:,:,:)::coeff,
& subevec,subscr1,subdens
integer::maxatm,maxelmnt
CONTAINS
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
subroutine formd_cuda(ftot_h,dtot,lowt,natoms,ian_h,natorb_h,
& lowlim_h,itr,nbasis,ifact_h,nelecs,maxbasfun,
& subsystems,subnelec_h,subbasis_h,totsubsys_h,coresubsys_h)
implicit none
integer,dimension(maxatm)::ian_h
integer,dimension(maxelmnt)::natorb_h
integer,dimension(natoms)::lowlim_h
integer,dimension(nbasis)::ifact_h
double precision, dimension(lowt)::ftot_h,dtot
integer::subsystems,cresidues,bresidues,x,y,I,xend,
& xj,xk,maxbasfun
double precision::temp
integer,dimension(subsystems,2)::totsubsys_h,coresubsys_h
integer,dimension(subsystems)::subbasis_h
double precision,dimension(subsystems,maxbasfun)::B_h,subeval_h,
& subnelec_h
integer::lowt,natoms,itr,counter,nbasis,nelecs,
& j,llk,ij,callno
double precision::ef
! GPU specific declarations
integer:: nthreads,blocksize,threadblocks,istat,cuError
type(dim3)::dimGrid,dimBlock
character*120 errmsg
! Set device to prefer L1 cache to shared memory
istat=cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)
write(*,*)maxatm,maxelmnt,natoms,lowt,subsystems,maxbasfun
write(*,*)"1"
allocate(ian_d(maxatm))
write(*,*)"2"
allocate(natorb_d(maxelmnt))
write(*,*)"3"
allocate(lowlim_d(natoms))
write(*,*)"4"
allocate(globdens(lowt))
write(*,*)"5"
allocate(ftot_d(lowt))
write(*,*)"6"
allocate(totsubsys_d(subsystems,2))
write(*,*)"7"
allocate(coresubsys_d(subsystems,2))
write(*,*)"8"
allocate(subbasis_d(subsystems))
write(*,*)"9"
allocate(B(subsystems,maxbasfun))
write(*,*)"10"
allocate(subeval(subsystems,maxbasfun))
write(*,*)"11"
allocate(subnelec(subsystems,maxbasfun))
write(*,*)"12"
allocate(subevec(subsystems,maxbasfun,maxbasfun))
write(*,*)"13"
allocate(coeff(subsystems,maxbasfun,maxbasfun))
write(*,*)"14"
allocate(subscr1(subsystems,maxbasfun,maxbasfun))
write(*,*)"15"
allocate(subdens(subsystems,maxbasfun,maxbasfun))
write(*,*)"16"
.
. .
. .
. .
. .
. .
.
I’m getting the runtime error:
0: ALLOCATE: copyin Symbol Memcpy FAILED:11(invalid argument)
As I can see it I’ve declared the correct arrays as device, allocatable and I’ve checked that all the extent scalars have values. When I run it with the “write” statements in it doesn’t get further that “1”…
any ideas?
Cheers for your help,
Crip_crop