Based on the changes that cause this error to go away, I think it is related to OpenMP target device function interference and the apparent lack of support for printf in device code, but nothing in this code says that nothing() runs on a device.
/opt/pgi/linux86-64/2019/bin/pgcc -c11 -g -O2 -mp pgi-stencil-target2.c
PGC-S-0077-Character string too long for array (pgi-stencil-target2.c: 33)
PGC-S-0077-Character string too long for array (pgi-stencil-target2.c: 33)
PGC-S-0077-Character string too long for array (pgi-stencil-target2.c: 33)
PGC/x86-64 Linux 19.10-0: compilation completed with severe errors
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <limits.h>
#include <math.h>
#include <omp.h>
void star2(const int n, const double * restrict in, double * restrict out) {
#pragma omp target teams distribute parallel for simd collapse(2) schedule(static,1)
for (int i=2; i<n-2; i++) {
for (int j=2; j<n-2; j++) {
out[i*n+j] += +in[(i±2)*n+(j+0)] * -0.125
+in[(i±1)*n+(j+0)] * -0.25
+in[(i+0)*n+(j±2)] * -0.125
+in[(i+0)*n+(j±1)] * -0.25
+in[(i+0)*n+(j+1)] * 0.25
+in[(i+0)*n+(j+2)] * 0.125
+in[(i+1)*n+(j+0)] * 0.25
+in[(i+2)*n+(j+0)] * 0.125;
}
}
}
typedef void (*stencil_t)(const int, const double * restrict, double * restrict);
void nothing(const int n, const double * restrict in, double * restrict out)
{
//printf(“You are trying to use a stencil that does not exist.\n”);
//printf(“Please generate the new stencil using the code generator.\n”);
// n will never be zero - this is to silence compiler warnings.
//if (n==0) printf(“%p %p\n”, in, out);
if (n==0) printf(“1”);
abort();
}
int main(int argc, char * argv)
{
printf(“Parallel Research Kernels version\n”);
printf(“C11/OpenMP TARGET Stencil execution on 2D grid\n”);
//////////////////////////////////////////////////////////////////////
// Process and test input parameters
//////////////////////////////////////////////////////////////////////
if (argc < 3){
printf(“Usage: <# iterations> [<star/grid> ]\n”);
return 1;
}
// number of times to run the algorithm
int iterations = atoi(argv[1]);
if (iterations < 1) {
printf(“ERROR: iterations must be >= 1\n”);
return 1;
}
// linear grid dimension
int n = atoi(argv[2]);
if (n < 1) {
printf(“ERROR: grid dimension must be positive\n”);
return 1;
} else if (n > floor(sqrt(INT_MAX))) {
printf(“ERROR: grid dimension too large - overflow risk\n”);
return 1;
}
// stencil pattern
bool star = true;
if (argc > 3) {
char* pattern = argv[3];
star = (0==strncmp(pattern,“star”,4)) ? true : false;
}
// stencil radius
int radius = 2;
if (argc > 4) {
radius = atoi(argv[4]);
}
if ( (radius < 1) || (2*radius+1 > n) ) {
printf(“ERROR: Stencil radius negative or too large\n”);
return 1;
}
#ifdef _OPENMP
printf(“Number of threads (max) = %d\n”, omp_get_max_threads());
#endif
printf(“Number of iterations = %d\n”, iterations);
printf(“Grid sizes = %d\n”, n);
printf(“Type of stencil = %s\n”, (star ? “star” : “grid”) );
printf(“Radius of stencil = %d\n”, radius );
stencil_t stencil = nothing;
if (star) {
switch (radius) {
case 2: stencil = star2; break;
}
}
//////////////////////////////////////////////////////////////////////
// Allocate space and perform the computation
//////////////////////////////////////////////////////////////////////
double stencil_time = 0.0;
size_t bytes = nnsizeof(double);
double * restrict in = malloc(bytes);
double * restrict out = malloc(bytes);
// HOST
// initialize the input and output arrays
{
for (int i=0; i<n; i++) {
for (int j=0; j<n; j++) {
in[i*n+j] = (double)(i+j);
out[i*n+j] = 0.0;
}
}
}
// DEVICE
//#pragma omp target data map(tofrom: in[0:nn], out[0:nn])
{
for (int iter = 0; iter<=iterations; iter++) {
if (iter==1) stencil_time = omp_get_wtime();
// Apply the stencil operator
stencil(n, in, out);
// Add constant to solution to force refresh of neighbor data, if any
#pragma omp target teams distribute parallel for simd collapse(2) schedule(static,1)
for (int i=0; i<n; i++) {
for (int j=0; j<n; j++) {
in[i*n+j] += 1.0;
}
}
}
stencil_time = omp_get_wtime() - stencil_time;
}
//////////////////////////////////////////////////////////////////////
// Analyze and output results.
//////////////////////////////////////////////////////////////////////
// interior of grid with respect to stencil
size_t active_points = (n-2radius)(n-2*radius);
// compute L1 norm in parallel
double norm = 0.0;
for (int i=radius; i<n-radius; i++) {
for (int j=radius; j<n-radius; j++) {
norm += fabs(out[i*n+j]);
}
}
norm /= active_points;
free(in);
free(out);
// verify correctness
const double epsilon = 1.0e-8;
double reference_norm = 2.(iterations+1.);
if (fabs(norm-reference_norm) > epsilon) {
printf(“ERROR: L1 norm = %lf Reference L1 norm = %lf\n”, norm, reference_norm);
return 1;
} else {
printf(“Solution validates\n”);
#ifdef VERBOSE
printf(“L1 norm = %lf Reference L1 norm = %lf\n”, norm, reference_norm);
#endif
const int stencil_size = star ? 4radius+1 : (2radius+1)(2radius+1);
size_t flops = (2stencil_size+1) * active_points;
double avgtime = stencil_time/iterations;
printf(“Rate (MFlops/s): %lf Avg time (s): %lf\n”, 1.0e-6 * (double)flops/avgtime, avgtime );
}
return 0;
}