 # Is there an error in this textbook code?

Hi all,

I am following this sample code from a text book [Hwu&Kirk]

This is code for tiled matrix multiplication for square matrices whose width is multiple of the TILE_WIDTH,

I have written my doubts as comments below…

global void MatrixMulKernel(float *M, float *N, float *P, int Width)
{
shared float Mds[TILE_WIDTH][TILE_WIDTH];
shared float Nds[TILE_WIDTH][TILE_WIDTH];

int bx = blockIdx.x;
int by = blockIdx.y;

// Identify the row and column of the P element to work on
int Row = by * TILE_WIDTH + ty;
int Col = bx * TILE_WIDTH + tx;
float Pvalue = 0;

// Loop over the M and N tiles required to compute P element
for (int ph = 0; ph < Width / TILE_WIDTH; ++ph)
{
Mds[ty][tx] = M[Row * Width + ph * TILE_WIDTH + tx];
// Are we transposing the columns of matrix N to a row in the tile here ?
Nds[ty][tx] = N[(ph * TILE_WIDTH + ty) * Width + Col];
for (int k = 0; k < TILE_WIDTH; ++k)
{
// If we have transposed the column to row, then use Nds[tx][k] ?
Pvalue += Mds[ty][k] * Nds[k][tx];
}
}

P[Row * Width + Col] = Pvalue;
}