Compiler failed to translate accelerator region

Hello,
I have been trying to use openACC on my laptop, MSI GP63 8DR, GTX 1050 ti. I have the following code to compile and I am pretty sure the code is correct since I have tried it on an Online lab where I am taking a course.

The compiler works perfectly for CPU, but when I tried the GPU, the following syntax, it gives the next error.

syntax:
pgcc -fast -ta=tesla:cc60,managed -Minfo=accel -o laplace jacobi.c laplace2d.c && echo “Compilation Successful” && ./laplace

Error:

pgcc-Warning-The -ta=tesla:managed option is not supported for win-64 systems

jacobi.c:
laplace2d.c:
PGC-S-0155-Compiler failed to translate accelerator region (see -Minfo messages): Could not find allocated-variable index for symbol (laplace2d.c: 47)
calcNext:
47, Accelerator kernel generated
Generating Tesla code
48, #pragma acc loop gang /* blockIdx.x /
Generating reduction(max:error)
50, #pragma acc loop vector(128) /
threadIdx.x */
48, Accelerator restriction: size of the GPU copy of Anew,A is unknown
50, Loop is parallelizable
PGC-F-0704-Compilation aborted due to previous errors. (laplace2d.c)
PGC/x86-64 Windows 18.4-0: compilation aborted

It looks like there is a compatibility issue, well, I don’t know how to fix it without downgrading my OS.

Thank you for reading.


The following is the code I want t compile.

Header ----------------

void initialize(double *restrict A, double *restrict Anew, int m, int n);

double calcNext(double *restrict A, double *restrict Anew, int m, int n);

void swap(double *restrict A, double *restrict Anew, int m, int n);

void deallocate(double *restrict A, double *restrict Anew);

main--------------------------

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include “laplace2d.h”

int main(int argc, char** argv)
{
const int n = 4096;
const int m = 4096;
const int iter_max = 1000;

const double tol = 1.0e-6;
double error = 1.0;

double restrict A = (double)malloc(sizeof(double)nm);
double restrict Anew = (double)malloc(sizeof(double)nm);

initialize(A, Anew, m, n);

printf(“Jacobi relaxation Calculation: %d x %d mesh\n”, n, m);

double st = omp_get_wtime();
int iter = 0;

while ( error > tol && iter < iter_max )
{
error = calcNext(A, Anew, m, n);
swap(A, Anew, m, n);

if(iter % 100 == 0) printf(“%5d, %0.6f\n”, iter, error);

iter++;

}

double runtime = omp_get_wtime() - st;

printf(" total: %f s\n", runtime);

deallocate(A, Anew);

return 0;
}

Function------------------------------------
#include <math.h>
#include <stdlib.h>

#define OFFSET(x, y, m) (((x)*(m)) + (y))

void initialize(double *restrict A, double *restrict Anew, int m, int n)
{
memset(A, 0, n * m * sizeof(double));
memset(Anew, 0, n * m * sizeof(double));

for(int i = 0; i < m; i++){
A _= 1.0;
Anew = 1.0;
}
}

double calcNext(double *restrict A, double *restrict Anew, int m, int n)
{
double error = 0.0;
#pragma acc parallel loop reduction(max:error)
for( int j = 1; j < n-1; j++)
{
for( int i = 1; i < m-1; i++ )
{
Anew[OFFSET(j, i, m)] = 0.25 * ( A[OFFSET(j, i+1, m)] + A[OFFSET(j, i-1, m)]

  • A[OFFSET(j-1, i, m)] + A[OFFSET(j+1, i, m)]);
    error = fmax( error, fabs(Anew[OFFSET(j, i, m)] - A[OFFSET(j, i , m)]));
    }
    }
    return error;
    }

    void swap(double *restrict A, double *restrict Anew, int m, int n)
    {
    #pragma acc parallel loop
    for( int j = 1; j < n-1; j++)
    {
    for( int i = 1; i < m-1; i++ )
    {
    A[OFFSET(j, i, m)] = Anew[OFFSET(j, i, m)];
    }
    }
    }

    void deallocate(double *restrict A, double *restrict Anew)
    {
    free(A);
    free(Anew);
    }
    ---------------------------------------------_

Hi rapastranac,

Currently we do not support the -ta=tesla:managed memory on Windows.

The managed option will automatically manage memory such that it is available to the GPU when needed (though sometimes it will do it in an suboptimal way). Without it, you’ll need to manually move the data with data directives

You could surround the while loop with the appropriate clauses. The specification (Specification | OpenACC) has a full listing of the data clauses. In this case, since Anew doesn’t look like it is needed by the host, it can be a created solely on the GPU

For example,

#pragma acc data copy(A) create(Anew)
{
while ...
}