Why does my program CRASH ?

Hello, everyone!

I wrote a simple CUDA program for matrix dot product that can be found right under this text.

// includes

#include <cutil_inline.h>

#include <shrUtils.h>

#include <cuda.h>

#define Tile_Width 2

#define Width 5

__global__ void MatrixMul(int *Md, int *Nd, int *Pd){

	int tx=threadIdx.x;

	int ty=threadIdx.y;

	float pom=0;

	for(int k=0;k<Width;k++){

			

		pom+=Md[ty*Width+k]*Nd[k*Width+tx];

		Pd[ty*Width+tx]=pom;

	

	}

}

int main(int argc, char** argv) {

	

		int *Md,*Nd,*Pd;

		int *M,*N,*P;

		int i,j;

		int Amount=Width*Width*sizeof(int);

	// Allocation of memory for CPU based matrix M,N and P

		M = (int*) malloc(sizeof(Amount));

		N = (int*) malloc(sizeof(Amount));

		P = (int*) malloc(sizeof(Amount));

	// Initialization of matrices M and N

		for(i=0;i<Width;i++){

			for(j=0;j<Width;j++){

		

				if(i==j){

					M[j+i*Width]=1;

					N[j+i*Width]=1;

				}

				if(i!=j){

					M[j+i*Width]=0;

					N[j+i*Width]=0;

				

				}

			}

		}

// Allocation of GPU based memory for Md, Nd and Pd matrices

		cudaMalloc((void**)&Md,Amount);

		cudaMemcpy(Md,M,Amount,cudaMemcpyHostToDevice);

		cudaMalloc((void**)&Nd,Amount);

		cudaMemcpy(Nd,N,Amount,cudaMemcpyHostToDevice);

		cudaMalloc((void**)&Pd,Amount);

// Setting up parameteres for kernel invocation

		dim3 dimGrid(1,1,1);

		dim3 dimBlock(Width,Width,1);	

// Kernel invocation

		MatrixMul<<<dimGrid,dimBlock>>>(Md,Nd,Pd);

// Return of results from GPu to CPU

		cudaMemcpy(P,Pd,(Width*Width*sizeof(int)),cudaMemcpyDeviceToHost);

		

		// Printing of results

			for(i=0;i<Width;i++){

				printf("\n");

			for(j=0;j<Width;j++){

		

			printf("[%d] ",P[i*Width+j]);

				}

			}

			printf("\n");

		}

Program crashes only if Width is 5 or more than that, if it’s 4 or less everything is working OK.

Does anyone know why is this happening?

Width is the y dimension of matrix.

Thank you,

I’am using GeForce 9600m GT on Win7 64-bit in VS 2008 with CUDA 3.0

M = (int*) malloc(sizeof(Amount));

heh, need Amount not the size of Amount.

Yes, I corrected that and it helped. Thank you