Hello.
The attached code is miniature and simplified part of a complicated code. What strategies, solutions and instructions do you recommend in order to reduce the computational time by open ACC?
Considering that a slight decrease in accuracy of output computations is acceptable.
I am looking forward to hear you soon.
King regards,
Sajjad Mohammadi
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <openacc.h>
#include<time.h>
#include <string.h>
#include <malloc.h>
#define NX 201
#define NY 101
#define NZ 201
int main(void)
{
clock_t start, end;
double cpu_time_used;
start = clock();
int i, j, k, l, m;
static double tr, w;
static double dt = 9.5e-9, t;
static double cu[NZ];
static double AA[NX][NY][NZ] , CC[NX][NY][NZ] , BB[NX][NY][NZ] ;
static double A[NX][NY][NZ] , B[NX][NY][NZ] , C[NX][NY][NZ] ;
FILE *file;
file = fopen("BB-and-A.csv", "w");
t = 0.;
#pragma acc data copy(B ,A , C,AA , CC,BB,cu )
{
for (l = 1; l < 255; l++) {
#pragma acc kernels
for (i = 1; i < NX - 1; i++) {
for (j = 0; j < NY - 1; j++) {
for (k = 1; k < NZ - 1; k++) {
A[i][j][k] = A[i][j][k]
+ 1. * (B[i][j][k] - AA[i][j][k - 1]);
}
}
}
#pragma acc kernels
for (i = 1; i < NX - 1; i++) { /* BB */
for (j = 1; j < NY - 1; j++) {
for (k = 0; k < NZ - 1; k++) {
B[i][j][k] = B[i][j][k]
+ 1.* (BB[i][j][k] - A[i - 1][j][k]);
}
}
}
#pragma acc kernels
for (m = 1; m < NZ - 1; m++) {
tr = t - (double)(m)*5 / 1.5e8;
if (tr <= 0.)
cu[m] = 0.;
else {
w = (tr / 0.25e-6)*(tr / 0.25e-6);
cu[m] =1666*w / (w + 1.)*exp(-tr / 2.5e-6) ;
cu[m] = 2*cu[m];
}
A[10][60][m] = -cu[m];
}
#pragma acc update self(B)
fprintf(file, "%e, %e \n", t*1e6, -B[22][60][10] );
t = t + dt;
}
}
end = clock();
cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("\n cpu time %5.5f sec ", cpu_time_used);
fclose(file);
}