Hi,
I am new using OpenAcc and I am trying to accelerate the following subroutine:
void Build_ERIS(vector<double> &eris, vector<Atomic_Orbital> &Basis)
{
/*Computing the totalnumber of unique repulsion integrals*/
int basis_size = Basis.size();
int m = basis_size*(basis_size+1)/2;
eris.resize(m*(m+1)/2);
bool compute;
std::fill(eris.begin(), eris.end(), 0);
#pragma acc data copy(Basis) copy(eris)
int i_orbital,j_orbital, k_orbital,l_orbital, i_primitive, j_primitive, k_primitive,l_primitive,ij,kl, ijkl,ijij,klkl;
#pragma acc kernels
{ //This is the line 217 in the error message.
for(i_orbital=0; i_orbital<basis_size; i_orbital++){
for(j_orbital=0; j_orbital<i_orbital+1; j_orbital++){
ij = i_orbital*(i_orbital+1)/2 + j_orbital;
for(k_orbital=0; k_orbital<basis_size; k_orbital++){
for(l_orbital=0; l_orbital<k_orbital+1; l_orbital++){
kl = k_orbital*(k_orbital+1)/2 + l_orbital;
if (ij >= kl) {
ijkl = composite_index(i_orbital,j_orbital,k_orbital,l_orbital);
ijij = composite_index(i_orbital,j_orbital,i_orbital,j_orbital);
klkl = composite_index(k_orbital,l_orbital,k_orbital,l_orbital);
//#pragma omp parallel for schedule(dynamic)
for(i_primitive=0; i_primitive<Basis[i_orbital].contraction.size; i_primitive++)
for(j_primitive=0; j_primitive<Basis[j_orbital].contraction.size; j_primitive++)
for(k_primitive=0; k_primitive<Basis[k_orbital].contraction.size; k_primitive++)
for(l_primitive=0; l_primitive<Basis[l_orbital].contraction.size; l_primitive++)
eris[ijkl] +=
normconst(Basis[i_orbital].contraction.exponent[i_primitive],Basis[i_orbital].angular.l, Basis[i_orbital].angular.m, Basis[i_orbital].angular.n)*
normconst(Basis[j_orbital].contraction.exponent[j_primitive],Basis[j_orbital].angular.l, Basis[j_orbital].angular.m, Basis[j_orbital].angular.n)*
normconst(Basis[k_orbital].contraction.exponent[k_primitive],Basis[k_orbital].angular.l, Basis[k_orbital].angular.m, Basis[k_orbital].angular.n)*
normconst(Basis[l_orbital].contraction.exponent[l_primitive],Basis[l_orbital].angular.l, Basis[l_orbital].angular.m, Basis[l_orbital].angular.n)*
Basis[i_orbital].contraction.coef[i_primitive]*
Basis[j_orbital].contraction.coef[j_primitive]*
Basis[k_orbital].contraction.coef[k_primitive]*
Basis[l_orbital].contraction.coef[l_primitive]*
ERI_int(Basis[i_orbital].contraction.center.x, Basis[i_orbital].contraction.center.y, Basis[i_orbital].contraction.center.z, Basis[i_orbital].contraction.exponent[i_primitive],Basis[i_orbital].angular.l, Basis[i_orbital].angular.m, Basis[i_orbi\
tal].angular.n,
Basis[j_orbital].contraction.center.x, Basis[j_orbital].contraction.center.y, Basis[j_orbital].contraction.center.z, Basis[j_orbital].contraction.exponent[j_primitive],Basis[j_orbital].angular.l, Basis[j_orbital].angular.m, Basis[j_orbi\
tal].angular.n,
Basis[k_orbital].contraction.center.x, Basis[k_orbital].contraction.center.y, Basis[k_orbital].contraction.center.z, Basis[k_orbital].contraction.exponent[k_primitive],Basis[k_orbital].angular.l, Basis[k_orbital].angular.m, Basis[k_orbi\
tal].angular.n,
Basis[l_orbital].contraction.center.x, Basis[l_orbital].contraction.center.y, Basis[l_orbital].contraction.center.z, Basis[l_orbital].contraction.exponent[l_primitive],Basis[l_orbital].angular.l, Basis[l_orbital].angular.m, Basis[l_orbi\
tal].angular.n);
/**/
}
}
}
}
}
}
}
But when I compile teh code using the flags:
CFLAGS= -U__GNUG__ -fast -acc -Minfo=accel -ta=tesla:manage -larmadillo -lgsl -w -std=c++11
pgc++ fails accelerating the code and I get the error:
PGCC-S-0155-Compiler failed to translate accelerator region (see -Minfo messages): Could not find allocated-variable index for symbol (engine2.cpp: 217)
Build_ERIS(std::vector<double, std::allocator<double>> &, std::vector<Atomic_Orbital, std::allocator<Atomic_Orbital>>&):
214, Generating copy(Basis[:],eris[:])
218, Loop is parallelizable
219, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
218, #pragma acc loop gang /* blockIdx.y */
219, #pragma acc loop gang /* blockIdx.x */
222, #pragma acc loop vector(32) /* threadIdx.x */
221, Loop is parallelizable
222, Loop is parallelizable
235, Inner sequential loop scheduled on accelerator
PGCC/x86 Linux 15.7-0: compilation completed with severe errors
How can I surpass this problem?
The line 217 was shown in the code above. What is going wrong with this line?