Mem_chunk error .

#include <stdlib.h>
#include <stdio.h>
/*
device forceinline int get_global_index(int a,int base)
{
for(int i=0;i<3;i++){
a[i+base]=base;
}
return base;
}
/

global void kernel1(int *array, int result, int row)
{
const int index = blockIdx.x
blockDim.x+threadIdx.x;

			int base =index*3;
			result[index]=base;
			for(int i=0;i<3;i++){
							array[i+base]=base;
				}
					//get_global_index(&array[0],base);  				}

}

int main(void)
{
int *A,*G_A,R,G_R,aa;
cudaError_t err;
A=(int )malloc(3sizeof(int));
R=(int )malloc(3sizeof(int));
cudaMalloc((void **)&G_A,3
sizeof(int));
cudaMalloc((void **)&G_R,3
sizeof(int));
for(int i=0;i<3;i++){
for(int j=0;j<3;j++){
scanf(“%d”,&aa);
A[i+j*3]=aa;
}
}

cudaMemcpy(G_A,A,3*sizeof(int),cudaMemcpyHostToDevice);
kernel1<<<1,3>>>(G_A,G_R,3);
cudaDeviceSynchronize();
err=cudaMemcpy(R,G_R,3*sizeof(int),cudaMemcpyDeviceToHost);
printf("CUDA Memcpy G_R : %s\n", cudaGetErrorString(err));
err=cudaMemcpy(A,G_A,3*sizeof(int),cudaMemcpyDeviceToHost);
printf("CUDA Memcpy G_A : %s\n", cudaGetErrorString(err));
for(int i=0;i<3;i++){
		for(int j=0;j<3;j++){
				printf("%d\t",A[i+j*3]);
					}
		printf("\n");
			}


for(int i=0;i<3;i++){
	printf("The answer is %d\n",R[i]);

}
free(R);free(A);
cudaFree(G_A);cudaFree(G_R);
return 0;

}

Can someone please see my code…why I am getting unexpected result all the time…PLease :) Thanks

First thread is working …but remaining is not working

Perhaps you should include proper cuda error checking (hint: google “proper cuda error checking”) and run your code with cuda-memcheck. Either one would give you an idea of errors in your code.

You are indexing out-of-bounds, both in host code and device code, because your A and G_A arrays are not large enough.

Let’s skip all the CUDA stuff for now: your host code is broken.

This line is providing storage for 3 integers in A:

A=(int *)malloc(3*sizeof(int));

These lines are trying to index and store a total of 9 integers in A:

for(int i=0;i<3;i++){
  for(int j=0;j<3;j++){
    scanf("%d",&aa);
    A[i+j*3]=aa;
  }
}

You have problems like that with both A and G_A in host and device code.