test on 'cudaMallocPitch' and 'cudaMemcpy2D'

#include <stdio.h>
#include <cuda.h>

global void test(int *p, size_t pitch){

*((int *)((char *)p + threadIdx.x * pitch) + threadIdx.y) = 1;

}

#define X 30
#define Y 40

main()
{
int *p, p_h[Y];
size_t pitch;

cudaMallocPitch((void**) &p, &pitch, Y*sizeof(int), X);
dim3 block(X,Y);
test<<<1,block>>>(p, pitch);

cudaMemcpy2D(p_h,Ysizeof(int),p,pitch,Ysizeof(int),X,cudaMemcpyDeviceToHost);
for(int i=0;i<X;i++) for(int j=0;j<Y;j++) printf(“%d\t%d\t%d\t\n”, i, j, p_h[i][j]);
cudaFree(p);
}

from the above very simple code to test cudaMallocPitch and cudaMemcpy2D, the output is coorect p_h[i][j] = 1 for small i and j.
but all p_h[i][j] = 0 for large values of i and j…

any comments or corrections i should make?

i suspect its because i am copying 1d array into 2d array, which is violation of cudaMemcpy2D…

i’d like to make sure of this.

Thanks in well advance.

#include <stdio.h>
#include <cuda.h>

global void test(int *p, size_t pitch){

*((int *)((char *)p + threadIdx.x * pitch) + threadIdx.y) = 1;

}

#define X 30
#define Y 40

main()
{
int *p, p_h[Y];
size_t pitch;

cudaMallocPitch((void**) &p, &pitch, Y*sizeof(int), X);
dim3 block(X,Y);
test<<<1,block>>>(p, pitch);

cudaMemcpy2D(p_h,Ysizeof(int),p,pitch,Ysizeof(int),X,cudaMemcpyDeviceToHost);
for(int i=0;i<X;i++) for(int j=0;j<Y;j++) printf(“%d\t%d\t%d\t\n”, i, j, p_h[i][j]);
cudaFree(p);
}

from the above very simple code to test cudaMallocPitch and cudaMemcpy2D, the output is coorect p_h[i][j] = 1 for small i and j.
but all p_h[i][j] = 0 for large values of i and j…

any comments or corrections i should make?

i suspect its because i am copying 1d array into 2d array, which is violation of cudaMemcpy2D…

i’d like to make sure of this.

Thanks in well advance.