cudaMemcpy2D memory error (heap) Error with 2D Array, allocating with calloc result in error (heap)

I am trying to work with a 2D Array

my firtst code works with memory from stack

the second code with memory allocated with calloc from heap results with a (bad pointer) error

both codes are equal, just allocation is different

is someone able to explain why this error happens?

[codebox]#include <stdio.h>

#include <cuda.h>

global void test(int *p, size_t pitch)

{

for (int r = 0; r < 5; ++r) { 

    int* row = (int*)((char*)p + r * pitch); 

    for (int c = 0; c < 5; ++c) { 

        row[c] = row[c] + 222; 

    } 

} 

}

main(){

const int size_x = 5; const int size_y = 5;

int *p, p_h[2], i, j;

size_t pitch;

for(i=0;i<size_y;i++){

	for(j=0;j<size_x;j++){

	p_h[1][i][j] = 2;

	}

}

cudaMallocPitch((void**) &p, &pitch, size_x*sizeof(int), size_y);

cudaMemcpy2D(p,pitch,p_h[1],size_x*sizeof(int),size_x*sizeof

(int),size_y ,cudaMemcpyHostToDevice);

test<<<1,1>>>(p, pitch);

cudaMemcpy2D(p_h[1],size_x*sizeof(int),p,pitch,size_x*sizeof

(int),size_y,cudaMemcpyDeviceToHost);

for(i=0;i<size_y;i++){

	for(j=0;j<size_x;j++){

	printf("%d\t%d \n",i*5+j, p_h[1][i][j]);

	}

}

system("PAUSE");

}[/codebox]

[codebox]#include <stdio.h>

#include <cuda.h>

global void test(unsigned char *p, size_t pitch)

{

for (int r = 0; r < 5; ++r) { 

	unsigned char* row = (unsigned char*)((char*)p + r * pitch); 

	for (int c = 0; c < 5; ++c) { 

		row[c] = row[c] + 222; 

	} 

} 

}

main(){

const int size_x = 5; const int size_y = 5;

unsigned char **p_h = (unsigned char**)(calloc(size_y,sizeof(unsigned char*)));

for (int j = 0; j < size_y; j++){

	p_h[j] = (unsigned char*)(calloc(size_x,sizeof(unsigned char*)));	

}

unsigned char *p;

size_t pitch;

for(int i=0;i<size_y;i++){

	for(int j=0;j<size_x;j++){

	p_h[i][j] = 2;

	printf("%d\t%d \n",i*5+j, p_h[i][j]);

	}

}

cudaMallocPitch((void**) &p, &pitch, size_x*sizeof(unsigned char), size_y);

cudaMemcpy2D(p,pitch,p_h,size_x*sizeof(unsigned char),size_x*sizeof(unsigned char),size_y ,cudaMemcpyHostToDevice);

test<<<1,1>>>(p, pitch);

cudaMemcpy2D(p_h,size_x*sizeof(unsigned char),p,pitch,size_x*sizeof(unsigned char),size_y,cudaMemcpyDeviceToHost);

//Output

for(int i=0;i<size_y;i++){

	for(int j=0;j<size_x;j++){

	printf("%d\t%d \n",i*5+j, p_h[i][j]);

	}

}

system("PAUSE");

}[/codebox]

sorry wrong forum
please delete this topic