__shared__ arrays working

Hi everybody!

I have some trouble understanding shared arrays.

I’ve a kernel code something like this:

Do I use shared_ array right? Couse it keeps giving me wrong numbers, but when I use simple " int ref_matrix[7][7]; " in definition, it gives me right numbers.

Did I misunderstand something? I really appreciate any help. THX

//d_in is a 1 dimensional array which was created from 2D array

//dimA is the size of d_in

//dimX is the original 2D array’s X dimension size

//dimY is the original 2D array’s Y dimension size

//matrixdim is the dimension of reference matrix

[codebox]global void SomethingCUDAKernel(int *d_out, int *d_in, int dimA, int dimX, int dimY, int matrixdim)

{

int idx = blockIdx.x*blockDim.x + threadIdx.x;

if (idx<dimA)

{

	int column = idx%dimX;		

	int minta_radius = matrixdim/2;

	

	if ( ( column > matrixdim + minta_radius ) && ( column < dimX - matrixdim - minta_radius ) )

	{

		int row = idx/dimX;	

		if ( row > matrixdim+ minta_radius) && ( row < dimY - matrixdim - minta_radius) )

		{

			//storing reference matrix

			__shared__ int ref_matrix[7][7];

			__shared__ int kivonando_matrix[7][7];

			for (int r=0; r<matrixdim; r++){

				for (int s=0; s<matrixdim; s++){

					ref_matrix[r][s] = d_in[(column + minta_radius - r)+(row + minta_radius - s)*dimX];

				}

			}

…[/codebox]

Hi everybody!

I have some trouble understanding shared arrays.

I’ve a kernel code something like this:

Do I use shared_ array right? Couse it keeps giving me wrong numbers, but when I use simple " int ref_matrix[7][7]; " in definition, it gives me right numbers.

Did I misunderstand something? I really appreciate any help. THX

//d_in is a 1 dimensional array which was created from 2D array

//dimA is the size of d_in

//dimX is the original 2D array’s X dimension size

//dimY is the original 2D array’s Y dimension size

//matrixdim is the dimension of reference matrix

[codebox]global void SomethingCUDAKernel(int *d_out, int *d_in, int dimA, int dimX, int dimY, int matrixdim)

{

int idx = blockIdx.x*blockDim.x + threadIdx.x;

if (idx<dimA)

{

	int column = idx%dimX;		

	int minta_radius = matrixdim/2;

	

	if ( ( column > matrixdim + minta_radius ) && ( column < dimX - matrixdim - minta_radius ) )

	{

		int row = idx/dimX;	

		if ( row > matrixdim+ minta_radius) && ( row < dimY - matrixdim - minta_radius) )

		{

			//storing reference matrix

			__shared__ int ref_matrix[7][7];

			__shared__ int kivonando_matrix[7][7];

			for (int r=0; r<matrixdim; r++){

				for (int s=0; s<matrixdim; s++){

					ref_matrix[r][s] = d_in[(column + minta_radius - r)+(row + minta_radius - s)*dimX];

				}

			}

…[/codebox]