Inside a function I developed, I’m attempting to parallelize the following nested loops using OpenACC.
#ifdef USE_OPENACC
#pragma acc parallel loop collapse(3) present(
RGrid_->values_[0:NRGrid_], ZGrid_->values_[0:NZGrid_], PhiGrid_->values_[0:NPhiGrid_],
isInsideTheLastCMS_->values_[0:NRGrid_*NZGrid_*NPhiGrid_],
iZetaMin_for_pointsInsideTheLastCMS_->values_[0:NRGrid_*NZGrid_*NPhiGrid_],
iZetaMax_for_pointsInsideTheLastCMS_->values_[0:NRGrid_*NZGrid_*NPhiGrid_]
)
#else
#pragma omp parallel for collapse(3)
#endif
for (U iR = 0; iR < NRGrid_; iR++) {
for (U iZ = 0; iZ < NZGrid_; iZ++) {
for (U iPhi = 0; iPhi < NPhiGrid_; iPhi++) {
// Define point coordinates
T RPoint = RGrid_->values_[iR];
T ZPoint = ZGrid_->values_[iZ];
T PhiPoint = PhiGrid_->values_[iPhi];
// Convert to Cartesian coordinates
T XPoint = RPoint * std::cos(PhiPoint);
T YPoint = RPoint * std::sin(PhiPoint);
// Initialize variables
U iRho = NRho_ - 1;
U iZeta_FC_1 = 0, iZeta_FC_2 = 0;
U iTheta_FC_1_p1 = 0, iTheta_FC_1_p2 = 0, iTheta_FC_2_p = 0;
T X3d, Y3d, Z3d;
T ddist2_min, ddist2;
// Compute initial distance
X3d = (*R3D_)(iRho, 0, 0) * std::cos((*Phi3D_)(iRho, 0, 0));
Y3d = (*R3D_)(iRho, 0, 0) * std::sin((*Phi3D_)(iRho, 0, 0));
Z3d = (*Z3D_)(iRho, 0, 0);
ddist2_min = (X3d - XPoint) * (X3d - XPoint) +
(Y3d - YPoint) * (Y3d - YPoint) +
(Z3d - ZPoint) * (Z3d - ZPoint);
// Find the closest point on LCMS
#ifdef USE_OPENACC
#pragma acc seq
#endif
for (U iTheta = 0; iTheta < NTheta_; iTheta++) {
for (U iZeta = 0; iZeta < NZeta_; iZeta++) {
X3d = (*R3D_)(iRho, iTheta, iZeta) * std::cos((*Phi3D_)(iRho, iTheta, iZeta));
Y3d = (*R3D_)(iRho, iTheta, iZeta) * std::sin((*Phi3D_)(iRho, iTheta, iZeta));
Z3d = (*Z3D_)(iRho, iTheta, iZeta);
ddist2 = (X3d - XPoint) * (X3d - XPoint) +
(Y3d - YPoint) * (Y3d - YPoint) +
(Z3d - ZPoint) * (Z3d - ZPoint);
if (ddist2 < ddist2_min) {
ddist2_min = ddist2;
iTheta_FC_1_p1 = iTheta;
iZeta_FC_1 = iZeta;
}
}
}
}
}
}
During compilation, I don’t have any problem, Howeve, when executing the code, I recieve the following message
Failing in Thread:1
Accelerator Fatal Error: call to cuStreamSynchronize returned error 700 (CUDA_ERROR_ILLEGAL_ADDRESS): Illegal address during kernel execution
File: /home/homam/Documents/MyCodes/SLIM/GoAHead/ToroidalSolver/FluxSurfaces3DReduced.h
Function: _ZN21FluxSurfaces3DReducedIjdE39locateCylGridPointsWithRespectToLastCMSEv:1378
Line: 1409
I don’t understand the source of this error. Moreover, the error disappears when I comment out the following lines in the code above.
ddist2_min = ddist2;
iTheta_FC_1_p1 = iTheta;
iZeta_FC_1 = iZeta;
Do you have an idea what is happening? Did I make a mistake in the way I’m parallelizing these 5 nested loops?
thanks