Hi,
I get some strange behavior with the following code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/**
* Calculates the Jacobi-preconditioner to the given matrix in ELLPACK-R format
* C = diag(A[1][1], ..., A[n][n])
* as array of dimension n.
*/
void jacobiPreconditioner (const int n, const int maxNNZ, const double* data, const int* indices, double* C) {
int i;
#pragma acc parallel loop present(data[0:n*maxNNZ], indices[0:n*maxNNZ], C[0:n])
for (i = 0; i < n; i++) {
int j;
/* go through indices-array until column=row */
for (j = 0; indices[j * n + i] != i; j++);
/* set C[i] = (a_ii)^-1 */
int k = j * n + i;
#if 1 // 0 works, 1 doesn't
C[i] = 1.0 / data[k];
#else
if (k > 0 && k < maxNNZ * n) {
C[i] = 1.0 / data[k];
}
else {
C[i] = 1.0 / data[k];
}
#endif
}
}
int main () {
const int n = 1391349;
const int maxNNZ = 249;
// malloc arrays
double* data = (double*) malloc(n * maxNNZ * sizeof(double));
int* indices = (int*) malloc(n * maxNNZ * sizeof(double));
double* C = (double*) malloc(n * sizeof(double));
// init data and indices arrays with some values
int i, j;
for (i = 0; i < n; i++) {
for (j = 0; j < maxNNZ; j++) {
int k = j * n + i;
data[k] = 42;
if ( j == (maxNNZ - 1) ) {
indices[k] = i;
}
else {
indices[k] = 0;
}
}
}
#pragma acc data copyin (data[0:n*maxNNZ], indices[0:n*maxNNZ]) create(C[0:n])
{
jacobiPreconditioner (n, maxNNZ, data, indices, C);
}
free(data);
free(indices);
free(C);
return 0;
}
This should calculate the jacobi-preconditioner of an Ellpack-R matrix.
But if i directly set
C[i] = 1.0 / data[k];
an error occurs:
call to cuStreamSynchronize returned error 700: Illegal address during kernel execution
If I first check, if k is in the bounds and do the same in both cases (even if it is not), the code works fine.
Is this a compiler bug?
I compiled with “pgcc -fastsse -mp -acc -Minfo=accel -ta=nvidia,pin,cc20 -c main.c”.
Thanks in advance,
Fabian