Hi I’m writing a program that’s using parallel gauss-jordan elimination. Here’s a code
[codebox]global static void Gauss_Parallel(float *a,float *x,float *b,int n,const int b_size)
{
shared float m;
int tx = threadIdx.x;
float C:
for (int k=0; k<n-1; k++)
for(int i=k+1; i<n; i++)
{
m=a[i*n+k]/a[k*n+k];
int j=tx;
while(j<n)
{
C=a[i*n+j]-mb[kn+tx];
a[i*n+j]=C;
j+=b_size;
}
if(tx==b_size-1)
b[i]-=m*b[k];
__syncthreads();
}
if(tx==0)
for (int i = n - 1; i >= 0; i --)
{
x[i] = b[i];
for (int j = i + 1; j < n; j ++) x[i] -= a[i*n+j] * x[j];
x[i]/=a[i*n+i];
}
__syncthreads();
}[/codebox]
And I’m getting an cudaErr_enum error in this part
[codebox]
C=a[i*n+j]-mb[kn+tx];
a[i*n+j]=C;
[/codebox]
C is declared as local variable inside a kernel function without any variable qualifiers. I read about cudaErr_enum and i understand it’s the shared memory space fault. But if I assing constant value to C (for example C=2.0) there is no crash during execution. If somebody have any clue what I’m doing wrong I would apprieciate some help.