Window has trigged a breakpoint in,corruption of heap window has trigged a breakpoint in , may due t

I wrote a program to Swap the content of Two Matrice.It gives a right output and also gives a runtime Error (see the attached snapshots).I am using visualBasic2005 win32 Emubug compiler.why does this code give a runtime error? I have attach output shapshot and Runtime Error snapshot herewith.

Following is the code:-

/************************************************************

********

*  TransformofMatrix.cu

*  This is a example of the CUDA program.

************************************************************

*********/

#include <stdio.h>

#include <stdlib.h>

#include <cuda_runtime.h>

#include <cutil.h>

#include<conio.h>

__global__ void TransformofMatrix(float *x,float *y,float *z,int width)

{

		int i=blockIdx.x*blockDim.x+threadIdx.x;

		if(i<width)

		{

		z[i]=x[i];

		x[i]=y[i];

		y[i]=z[i];

		}

		

}

int main()

{

			float *a_h,*b_h,*c_h,*a_d,*b_d,*c_d;

			int width,i;

			width=10;

			size_t size=sizeof(float)*width;

			a_h=(float *)malloc(size);

			cudaMalloc((void**)&a_d,size);

			b_h=(float *)malloc(size);

			cudaMalloc((void**)&b_d,size);

			c_h=(float *)malloc(size);

			cudaMalloc((void**)&c_d,size);

			printf("\nEnter the Elements of First Matrix");

			for(i=0;i<width;i++)

			{

				scanf("%f",&a_h[i]);

			}

			

			printf("\nElements of First Matrix");

			for(i=0;i<width;i++)

			{			

				printf("\n%f",a_h[i]);

			}

			cudaMemcpy(a_d,a_h,size,cudaMemcpyHostToDevice);

			printf("\nEnter the Elements of second  Matrix");

	

			for(i=0;i<width;i++)

			{

				scanf("%f",&b_h[i]);

			}

				printf("\nElements of Second Matrix");

			for(i=0;i<width;i++)

		

			{			

				printf("\n%f",b_h[i]);

			}

			

			cudaMemcpy(b_d,b_h,size,cudaMemcpyHostToDevice);

			for(i=0;i<width;i++)

			{

				c_h[i]=0;			

			}

			cudaMemcpy(c_d,c_h,size,cudaMemcpyHostToDevice);

			int blocksize=4;

			int nblock=width/blocksize+(width%blocksize==0?0:1);

			printf("\nNblock=%d",nblock);

			TransformofMatrix<<<nblock,blocksize>>>(a_d,b_d,c_d,width);

			cudaMemcpy(a_h,a_d,size,cudaMemcpyDeviceToHost);

			printf("\nAfter Swapping Elements of First Matrix");

			for(i=0;i<width;i++)

			{

				printf("\n%f",a_h[i]);

				

			}

			cudaMemcpy(b_h,b_d,size,cudaMemcpyDeviceToHost);

			printf("\nAfter Swapping Elements of second Matrix");

			for(i=0;i<width;i++)

			{

				printf("\n%f",b_h[i]);

				

			}

			

			free(a_h);

			free(b_h);

			free(c_h);

			free(a_d);

			free(b_d);

			free(c_d);

			getch();

			return 0;

}

Thanking you
RunTime_Error.JPG
output_of_program.JPG

you use “free” to deallocate device memory, this is invalid

modify

free(a_d);

			free(b_d);

			free(c_d);

to

cudaFree(a_d); 

			cudaFree(b_d);	

			cudaFree(c_d);

Respected Sir,

                                                           Thanks for your Quick reply.

Thanking you

Deepak bajaj