_global_void MtxKernel(floatMd, floatNd, floatPd, int Width)
{
int Row = blockIdx.yTILEWIDTH+threadIdx.y;
int Col = blockIdx.x*TILEWIDTH+threadIdx.x;
float Pvalue = 0;
for (int k=0; k<Width; ++k )
Pvalue += Md[RowWidth+k] * Nd[kWidth+Col];
Pd[Row*Width+Col] = Pvalue;
}
Well, this is an example for matrix multiplication.
Now, say I want to get a norm from Pd doing the following;
_global_void MtxKernel(floatMd, floatNd, float*Pd, int Width)
{
fload add;
float Pvalue = 0;
for (int k=0; k<Width; ++k )
Pvalue += Md[RowWidth+k] * Nd[kWidth+Col];
Pd[Row*Width+Col] = Pvalue;
float add = 0;
for (int k=0; k<Width; ++k )
add += Pd[RowWidth+k] * Pd[kWidth+Col];
add = sqrt(add);
}
The way I am doing is correct?
Then next question is how to print this value add?
Please teach me!
Thanks in advance.