barash
June 14, 2015, 12:45pm
1
Hello!
The next very simple example works differently with and without the debugging option “-g”. How could this be ?
#include<stdio.h>
void main(){
int i,j;
double p[10][10];
#pragma acc enter data create(p)
for(i=0;i<10;i++) for(j=0;j<10;j++) p[i][j]=3.;
#pragma acc update device(p)
#pragma acc data present(p[0:10][0:10])
{
#pragma acc region
{
#pragma acc loop gang vector collapse(2) independent
for(i=0;i<10;i++) {
for(j=0;j<2;j++) {
p[i][j] *= -1. ;
}
for(j=7;j<9;j++) {
p[i][j] *= -1. ;
}
}
}}
#pragma acc update host(p)
printf("p[0][0]=%f\n",p[0][0]);
#pragma acc exit data delete(p)
}
The result is as follows:
$ pgcc -ta=host -o b b.c
$./b
p[0][0]=-3.000000
$ pgcc -acc -ta=tesla -g -o b b.c
$ ./b
p[0][0]=3.000000
$ pgcc -acc -ta=tesla -o b b.c
$ ./b
p[0][0]=-3.000000
$
barash
June 15, 2015, 8:24pm
2
Below is another, more serious example. It seems that the compiler works incorrectly.
#include<stdio.h>
void fix(double F1[][10][10][10], double F2[][10][10][10])
{
#pragma acc data present( F1[0:10][0:10][0:10][0:10], \
F2[0:10][0:10][0:10][0:10] )
{
#pragma acc region
{
int i,j,k,m ;
#pragma acc loop gang vector collapse(2) independent
for(i=0;i<10;i++) {
for(k=0;k<10;k++) {
F1[i][-1][k][6] = -F1[i][0][k][6] ;
F1[i][9][k][6] = -F1[i][8][k][6] ;
for(m=0;m<10;m++) F2[i][9][k][m] = F2[i][0][k][m] = 0. ;
}
}
}
}
}
void main(){
double f1[10][10][10][10];
double f2[10][10][10][10];
#pragma acc enter data create(f1,f2)
int i,j,k,m;
for(i=0;i<10;i++)for(j=0;j<10;j++)
for(k=0;k<10;k++)for(m=0;m<10;m++)
{ f1[i][j][k][m]=i+j+k+m; f2[i][j][k][m]=i*j*k*m;}
#pragma acc update device(f1,f2)
fix(f1,f2);
#pragma acc update host(f1,f2)
double sum=0;
for(i=0;i<10;i++)for(j=0;j<10;j++)
for(k=0;k<10;k++)for(m=0;m<10;m++)
sum+= (f1[i][j][k][m]+f2[i][j][k][m]);
printf("%f\n",sum);
#pragma acc exit data delete(f1,f2)
}
The result is as follows:
$ pgcc -ta=host -o ex ex.c
$ ./ex
3456430.000000
$ pgcc -acc -ta=tesla -o ex ex.c
$ ./ex
3455800.000000
$
Why do the results differ ?
Hi barash,
Did you mean to use a “-1” as an index?
F1[i][-1][k][6] = -F1[i][0][k][6]
This may be causing a race condition since you’re writing out of the bounds of this dimension.
tull
June 15, 2015, 11:27pm
4
The first problem has been filed as TPR 21729.
It fails as you described with -ta=tesla -g
barash
June 16, 2015, 8:00am
5
Thanks a lot! I’m in a process of converting a large program to OpenACC, and there are a number of parts where different results are obtained in host and acc regimes. I’m trying to find out reason of each particular difference and I get these examples from there.
Sorry, -1 as an index was a misprint.
Here is the corrected version of that.
#include<stdio.h>
void fix(double F1[][10][10][20], double F2[][10][10][20])
{
#pragma acc data present( F1[0:10][0:10][0:10][0:20], \
F2[0:10][0:10][0:10][0:20] )
{
#pragma acc region
{
int i,j,k,m ;
#pragma acc loop gang vector collapse(3) independent
for(i=0;i<9;i++) {
for(k=0;k<2;k++) {
for(m=0;m<17;m++) F2[i][9][k][m] = 0. ;
for(m=0;m<17;m++) F2[i][1][k][m] = 0. ;
}
}
}
}
}
void main(){
double f1[10][10][10][20];
double f2[10][10][10][20];
#pragma acc enter data create(f1,f2)
int i,j,k,m;
for(i=0;i<10;i++)for(j=0;j<10;j++)
for(k=0;k<10;k++)for(m=0;m<20;m++)
{ f1[i][j][k][m]=i+j+k+m; f2[i][j][k][m]=i*j*k*m;}
#pragma acc update device(f1,f2)
fix(f1,f2);
#pragma acc update host(f1,f2)
double sum=0;
for(i=0;i<10;i++)for(j=0;j<10;j++)
for(k=0;k<10;k++)for(m=0;m<20;m++)
sum+= (f1[i][j][k][m]+f2[i][j][k][m]);
printf("%f\n",sum);
#pragma acc exit data delete(f1,f2)
}
It works as follows:
$ pgcc -ta=host -o f f.c
$ ./f
17724790.000000
$ pgcc -ta=tesla -o f f.c
$ ./f
17729686.000000
$
Hi Barash,
The problem here is that “collapse” is only valid on tightly nested loops, but your code is trying to collapse the inner loop. Granted, we should be issuing an error, so I added TPR#21732 to see if we can catch this case.
The fix is to only collapse the outer loops by changing “collapse(3)” to “collapse(2)”.
Note that you should probably change the “region” directive to “kernels”. “region” is from the PGI Accelerator Model which “kernels” is it’s name in OpenACC. It’s fine to use with PGI, but other compilers wont accept it.
Hope this helps,
Mat