Hello !
I don’t know if my problem is a simple one or if I’m really a noob !
Here is my code :
__global__ void test(float *p, size_t pitch, int H, int W){
int idx=threadIdx.x + blockIdx.x* blockDim.x;
int idy=threadIdx.y + blockIdx.y* blockDim.y;
int index =idx + idy*W;
if ( idx < W && idy < H ) p[index] = 3;
}
main()
{
float *p;
float p_h[256][24];
int i, j;
int H =24;
int W =256;
size_t pitch;
dim3 threadPerBlock(32,8);
dim3 dimGrid(W/threadPerBlock.x , H/threadPerBlock.y);
cudaMallocPitch((void**) &p, &pitch, W*sizeof(float), H);
test<<<dimGrid,threadPerBlock>>>(p, pitch,H,W);
cudaMemcpy2D(p_h,W*sizeof(float),p,pitch,W*sizeof(float),H,cudaMemcpyDeviceToHost);
for(i=0;i<W;i++) for(j=0;j<H;j++) printf("%d ", (int)p_h[i][j]);
cudaFree(p);
}
And I get the good results ! But know i juste replace
by
And when I do so, it compiles but I’ve got a Segmentation Fault when the program tries to access to p_h[0][0] !
Can anybody tells me why ? And how I can deal with it ?
Thank you for your time !