zero output when OpenACC is used

Hi
I use PGI community edition 17.10 to compile and run fallowing code. why the output will be zero when I add directives of OpenACC?
may you if help me why it’s happen?


my code with OpenACC directives is in fallowing paragraphe

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <openacc.h>
#include<time.h>
#include <string.h>
#include <cuda_runtime_api.h>
#define NX 201
#define NY 101
#define NZ 201
int main(void)
{
static int i, j, k, l, m;
static double vr, tr, w1, w2;
static double dt, t, comp;
static double cu[NZ];
static double ex[NX][NY][NZ] = { 0. }, JJ[NX][NY][NZ] = { 0. }, BB[NX][NY][NZ] = { 0. };
static double A[NX][NY][NZ] = { 0. }, B[NX][NY][NZ] = { 0. }, C[NX][NY][NZ] = { 0. };
clock_t start, end;
double cpu_time_used;
start = clock();
vr = 1.5e8;
dt = 9.5e-9;
FILE file;
file = fopen(“BB-and-A.csv”, “w”);
//#pragma acc data copyin( i, j, k, l, m,vr, tr, w1, w2,dt, t, comp,ex, JJ,A, C),copyout (t,cu,BB,B)
//{
t = 0.;
for (l = 0; l < 737; l++) {
//#pragma acc kernels loop private(i, j, k)
for (i = 0; i < NX - 1; i++) {
for (j = 1; j < NY - 1; j++) {
for (k = 1; k < NZ - 1; k++) {
ex_[j][k] = 1.
ex[j][k]

      • (C[j][k] - C[j - 1][k])
      • (B[j][k] - B[j][k - 1]);
        }
        }
        }
        //#pragma acc kernels loop private(i,j,k)
        for (i = 1; i < NX - 1; i++) {
        for (j = 0; j < NY - 1; j++) {
        for (k = 1; k < NZ - 1; k++) {
        JJ[j][k] = 1. * JJ[j][k]
      • (A[j][k] - A[j][k - 1])
      • (C[i][j][k] - C[i - 1][j][k]);
        }
        }
        }
        //#pragma acc kernels loop private(i,j,k)
        for (i = 1; i < NX - 1; i++) { /* BB */
        for (j = 1; j < NY - 1; j++) {
        for (k = 0; k < NZ - 1; k++) {
        BB[i][j][k] = 1. * BB[i][j][k]
      • (B[i][j][k] - B[i - 1][j][k])
      • (A[i][j][k] - A[i][j - 1][k]);
        }
        }
        }
        //#pragma acc kernels loop private(i,j,k)
        for (i = 1; i < NX - 1; i++) {
        for (j = 0; j < NY - 1; j++) {
        for (k = 0; k < NZ - 1; k++) {
        A[i][j][k] = A[i][j][k]
  • 0.0015 * (BB[i][j + 1][k] - BB[i][j][k])
  • 0.0015 * (JJ[i][j][k + 1] - JJ[i][j][k]);
    }
    }
    }
    //#pragma acc kernels loop private(i,j,k)
    for (i = 0; i < NX - 1; i++) { /* C */
    for (j = 1; j < NY - 1; j++) {
    for (k = 0; k < NZ - 1; k++) {
    B[i][j][k] = B[i][j][k]
  • 0.0015 * (ex[i][j][k + 1] - ex[i][j][k])
  • 0.0015 * (BB[i + 1][j][k] - BB[i][j][k]);
    }
    }
    }
    //#pragma acc kernels loop private(i,j,k)
    for (i = 0; i < NX - 1; i++) {
    for (j = 0; j < NY - 1; j++) {
    for (k = 1; k < NZ - 1; k++) {
    C[i][j][k] = C[i][j][k]
  • 0.0015 * (JJ[i + 1][j][k] - JJ[i][j][k])
  • 0.0015 * (ex[i][j + 1][k] - ex[i][j][k]);
    }
    }
    }
    for (m = 10; m < NZ - 1; m++) {
    tr = t - (double)(m)5 / vr;
    if (tr <= 0.)
    cu[m] = 0.;
    else {
    w1 = (tr / 0.25e-6)
    (tr / 0.25e-6);
    w2 = (tr / 2.e-6)(tr / 2.e-6);
    cu[m] = 10700. / 0.639
    w1 / (w1 + 1.)exp(-tr / 2.5e-6) + 6500. / 0.867w2 / (w2 + 1.)exp(-tr / 230.e-6);
    cu[m] = (exp((double)(m)
    -5. / 2000.))*cu[m];
    }
    A[10][60][m] = -cu[m] / (4.*5.);
    A[10][59][m] = cu[m] / (4.*5.);
    B[10][60][m] = cu[m] / (4.*5.);
    B[9][60][m] = -cu[m] / (4.*5.);
    }
    comp = ((double)(l + 1) / 737)100.;
    printf(“Computation: %4.3f %% completed \r”, comp);
    fprintf(file, “%e, %e, %e, %e \n”, t
    1e6, cu[30] / 1000., -BB[30][60][10] / 1000., -(B[109][60][10] + B[110][60][10]) / 2.);
    t = t + dt;
    }
    //}
    fclose(file);
    end = clock();
    cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
    printf("\n cpu time %5.5f sec ", cpu_time_used);
    }

    Thanks in advance,
    sajad_