In the case of Jacobi iteration, I’m trying to put the computation clauses into a function, and the code is
#include <math.h>
#include <string.h>
#include "timer.h"
#include <stdio.h>
#define NN 1024
#define NM 1024
float A[NN][NM];
float Anew[NN][NM];
const int n = NN;
const int m = NM;
const int iter_max = 1000;
const double tol = 1.0e-6;
double error = 1.0;
void testfunction(float A[NN][NM], float Anew[NN][NM])
{
#pragma acc data present(A, Anew)
#pragma acc kernels
{
error = 0.0;
for( int j = 1; j < n-1; j++)
{
for( int i = 1; i < m-1; i++ )
{
Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
+ A[j-1][i] + A[j+1][i]);
error = fmax( error, fabs(Anew[j][i] - A[j][i]));
}
}
for( int j = 1; j < n-1; j++)
{
for( int i = 1; i < m-1; i++ )
{
A[j][i] = Anew[j][i];
}
}
}
}
int main(int argc, char** argv)
{
memset(A, 0, n * m * sizeof(float));
memset(Anew, 0, n * m * sizeof(float));
for (int j = 0; j < n; j++)
{
A[j][0] = 1.0;
Anew[j][0] = 1.0;
}
printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
StartTimer();
int iter = 0;
#pragma acc data copy(A), create(Anew)
while ( error > tol && iter < iter_max )
{
testfunction(A, Anew);
if(iter % 100 == 0) printf("%5d, %0.6f\n", iter, error);
iter++;
}
double runtime = GetTimer();
printf(" total: %f s\n", runtime / 1000);
}
But it seems to be some errors, and the compiler reported as
shawnl@shawnl-ubuntu:~/Desktop/OpenACC_Projects/test$ pgc++ -acc -Minfo=accel -ta=nvidia test2Dfunction_2modi.cpp testfunction(float (*)[1024], float (*)[1024]):
22, Generating present(A[:][:],Anew[:][:])
25, Complex loop carried dependence of A->,Anew-> prevents parallelization
Accelerator scalar kernel generated
27, Complex loop carried dependence of A->,Anew-> prevents parallelization
35, Complex loop carried dependence of Anew->,A-> prevents parallelization
Accelerator scalar kernel generated
37, Complex loop carried dependence of Anew->,A-> prevents parallelization
main:
64, Generating copy(A[:][:])
Generating create(Anew[:][:])
My questions are:
Does the present clause work for 2D/3D arrays in a function?
How to manage the data transformation in a function?
Or,
How to use the restrict keyword to 2D/3D arrays?
Should it be rewritten as a 1D form?
Many thanks for helping.