void cSlideFbndry::service( Int nxr, Int nvr, Int nqr, Real tm, Real *sxr, Real **sqr, Int iqs, Int iqe, Real *sxq, Real *sq, Real *saux, Real *sdxdx, Real *sdqdx, Int ibs, Int ibe, Int *sibq, Real *sxb, Real *sqb, Real *sauxb, Int nq, Int nbb ) { //cout << "sliding plane services " << this << "\n"; Int nx,iv,ix,ib,iq,it,ir,is0; Real y[3]; Int is, asct; Real *lsqr; //if(!bposix) cout << "mutex free sliding plane\n"; Int n; nx= coo->getnx(); *sqr= new Real[nqr*(nvr+1)]; //qr= new Real*[nvr+1]; //subv( nvr+1,nqr,*sqr,qr ); lsqr = *sqr; if( ibe > ibs ) { #pragma acc enter data copyin(lsqr[0:(nvr+1)*nqr],sxr[0:nxr*nqr]) n= ibe-ibs; cTabData data; coo->get( &data ); data.get( "assembly-sectors", &asct ); ptch = pi2/(Real)asct; #pragma acc update device(ptch) // cout << "the pitch is " << ptch << " with number of repetitive sectors " << asct << "\n"; n = asct*n; if( !kdt ) { //cout << "create kd tree............\n"; Real *tmp[3]; Real *stmp, *stmp1; if( n>0 ) { #ifdef _OPENACC cudaMallocManaged(&kdt,sizeof(cKdTree)); #else kdt= new cKdTree(); #endif idt= new Int[n]; isec= new Int[n]; stmp= new Real[nxr*n]; stmp1= new Real[nxr*n]; #pragma acc enter data copyin(idt[0:n],isec[0:n],stmp[0:nxr*n],stmp1[0:nxr*n]) #pragma acc parallel loop gang vector\ present(sibq[0:nbb],stmp[0:nxr*n],stmp1[0:nxr*n],sxq[0:nxr*nq],this) \ default(none) for( ib=ibs;ibtoffset( 0,ibe-ibs, 1, ptch, tmp1 ); //coo->toffset( 0,ibe-ibs, 1, ptch, stmp1, n ); coo->toffsetgpu( 0,ibe-ibs, 1, ptch, stmp1, n, nx ); #pragma acc parallel loop\ present(sibq[0:nbb],stmp[0:nx*n],stmp1[0:nx*n],idt[0:n],isec[0:n],this) \ default(none) for( ib=ibs; ibbuild( nx,n, tmp ); delete[] stmp; stmp=NULL; delete[] stmp1; stmp1=NULL; #pragma acc exit data delete(stmp,stmp1) } } coo->toffsetgpu( 0,nqr, 1,-tm, sxr, nqr, nxr ); #pragma acc parallel loop gang vector\ private(iq,is) \ present(lsqr[0:(nvr+1)*nqr],idt[0:n],isec[0:n],kdt[:1],sxr[0:nxr*nqr],sq[0:nvr*nq],this) \ default(none) for( ir=0;irnearest( x0,&ipmin,&d ); iq = idt[ipmin]; is = isec[ipmin]; for(iv=0; ivtoffsetgpu( 0,nqr, 1,tm, sxr, nqr, nxr ); coo->toffsetgpu( 0,nqr, 1,tm, lsqr, nqr, nvr+1 ); #pragma acc exit data copyout(lsqr[0:(nvr+1)*nqr]) #pragma acc exit data delete(sxr[0:nx*nqr]) } else { // cout << "no points on this boundary ...\n"; #pragma acc enter data copyin(lsqr[0:(nvr+1)*nqr]) #pragma acc parallel loop gang vector\ present(lsqr[0:(nvr+1)*nqr]) \ default(none) for( ir=0;ir