Hi all,
I am trying to implement a simple calculation. For this I need to check if my indices reside within the matrix dimensions in case N*blocksizeX != width_Matrix for instance. More precisely, I use the variable gridDim in order to check whether my block has reached the border of the matrix or not.
I was assuming that griDim does not vary within a kernel , but it does:
just call the kernel with the the following dimensions (20x20 = dim(A/S/C))
dim3 grid(5,1,1);
dim3 block(4,32,1);
test_kernel<<<grid,block>>>(input_A,input_S,output_C,20,0,0)
and use the following kernel
__global__ void
test_kernel( float* global_A, float* global_S, float* global_C, \
const int NUM_EL, const int LEFT_EL_X, const int LEFT_EL_Y)
{
const int BLOCK_DIM_X = blockDim.x;
const int BLOCK_DIM_Y = blockDim.y;
const int GRID_DIM_X = gridDim.x;
const int GRID_DIM_Y = gridDim.y;
int thx = threadIdx.x;
int thy = threadIdx.y;
int blx = blockIdx.x;
int bly = blockIdx.y;
int block_index_A = __mul24(__mul24(blx,NUM_EL),BLOCK_DIM_X) + __mul24(bly,BLOCK_DIM_Y);
int thread_index_A = thy + __mul24(thx,NUM_EL);
int block_index_S = __mul24(__mul24(blx,NUM_EL),BLOCK_DIM_X) + __mul24(blx,BLOCK_DIM_X);
int thread_index_S = thx + __mul24(thx,NUM_EL);
if (blx < GRID_DIM_X-1 && bly == GRID_DIM_Y-1) {
if(thy < LEFT_EL_Y )
global_C[block_index_A + thread_index_A] = GRID_DIM_Y;
}
else if(blx == GRID_DIM_X-1 && bly < GRID_DIM_Y-1){
if(thx < LEFT_EL_X)
global_C[block_index_A + thread_index_A] = GRID_DIM_Y;
}
else if(blx == GRID_DIM_X-1 && bly == GRID_DIM_Y-1){
if(thx < LEFT_EL_X && thy < LEFT_EL_Y)
global_C[block_index_A + thread_index_A] = GRID_DIM_Y;
}
else if(blx < GRID_DIM_X-1 && bly < GRID_DIM_Y-1)
global_C[block_index_A + thread_index_A] =GRID_DIM_Y;
}
Hope i provided enough information.
any help would be appreciated!
cem