I HAVE A CODE AS BELOW;
for (k=0;k<n;k++)
{
temp=k;
for(p=k;p<n;p++)
{
if( fabs(a[p*n+k])> fabs(a[temp*n+k]))
temp=p;
}
for(i=0;i<n;i++)
{
change=a[k*n+i];
a[k*n+i]=a[temp*n+i];
a[temp*n+i]=change;
}
if(a[k*n+k]==0)
return 0;
[color=Red]for(j=k+1;j<n;j++)
{
a[j*n+k]=a[j*n+k]/a[k*n+k];
}
for(j=k+1;j<n;j++)
{
for(i=k+1;i<n;i++)
{
a[j*n+i]=a[j*n+i]-a[j*n+k]*a[k*n+i];
}
}[/color]
}
I want to ACELERATE the red area that i marked,so I oPTiMIzIED the coDE As BelOW:
for (k=0;k<n;k++)
{
temp=k;
for(p=k;p<n;p++)
{
if( fabs(a[p*n+k])> fabs(a[temp*n+k]))
temp=p;
}
for(i=0;i<n;i++)
{
change=a[k*n+i];
a[k*n+i]=a[temp*n+i];
a[temp*n+i]=change;
}
if(a[k*n+k]==0)
return 0;
[color=Red]#pragma acc data copy(a[:N*N])
{
#pragma acc kernels
{
#pragma acc loop independent
for(j=k+1;j<n;j++)
{
a[j*n+k]=a[j*n+k]/a[k*n+k];
}
}
#pragma acc kernels
{
#pragma acc loop independent
for(j=k+1;j<n;j++)
{
#pragma acc loop independent
for(i=k+1;i<n;i++)
{
a[j*n+i]=a[j*n+i]-a[j*n+k]*a[k*n+i];
}
}
}
}[/color]
}
it can be compiled succesfully,but it seemed be not accelerated.i found there should be somthing wrong with #pragma acc data copy(a[:N*N]) it need copy daTA For eaCh k lOOP.i know that ‘updaTE’ COuLD be used for updating data,so how can i use it in my code/[/quote]