Hi I am just at the moment outputting what the threads are finding when looping through a matrix and when I do it I seem to get the elements of the matrix multiple times… I can’t see the problem any ideas?
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define BLOCK_SIZE 4
#define H 4
#define W 4
#define S 16
#define AS(i, j) As[i][j]
__global__ void
matrix(float* N, int w)
{
int blockX = blockIdx.x;
int blockY = blockIdx.y;
int threadX = threadIdx.x;
int threadY = threadIdx.y;
int begin = w * BLOCK_SIZE * blockY;
int end = begin + w - 1;
int step = BLOCK_SIZE;
float n = 0;
int k = 0;
for(int a = begin; a<=end; a+=step)
{
__shared__ float As[BLOCK_SIZE][BLOCK_SIZE];
//AS(ty, tx) = A[a + wA * ty + tx];
AS(threadY,threadX) = N[a + w * threadY + threadX];
__syncthreads();
for(int k = 0; k < BLOCK_SIZE; ++k)
{
//Csub += AS(ty, k) * BS(k, tx);
n += AS(threadY, k);
printf("You found the value%f\n" ,n);
}
__syncthreads();
}
}