cudaMallocPitch

Hi all
I’m tryng tomultiply a scalar for an array. my array has M*N elements and it is allocated with cudamallocpitch.
i think there’s something wrong in my kernel
can anybody help me?

    array_host=(int*)malloc(M*N*sizeof(int));

   cudaMallocPitch((void**)&array_device,&pitch, N*sizeof(int), M);


cudaMemcpy(array_device, array_host, sizeof(int)MN, cudaMemcpyHostToDevice);

    kernel<<<nBlock, nThreadPerBlock>>>(array_device,pitch,M);
    cudaMemcpy(array_host, array_device, M*N*sizeof(int), cudaMemcpyDeviceToHost);

global void kernel(int vet, int pitch, int k)
{
int iy = blockIdx.y
blockDim.y + threadIdx.y;
int ix = blockIdx.xblockDim.x + threadIdx.x;
int
r= (int*)((char*)vet + iy * pitch);
r[ix]=k*r[ix];
}

Hi all
I’m tryng tomultiply a scalar for an array. my array has M*N elements and it is allocated with cudamallocpitch.
i think there’s something wrong in my kernel
can anybody help me?

    array_host=(int*)malloc(M*N*sizeof(int));

   cudaMallocPitch((void**)&array_device,&pitch, N*sizeof(int), M);


cudaMemcpy(array_device, array_host, sizeof(int)MN, cudaMemcpyHostToDevice);

    kernel<<<nBlock, nThreadPerBlock>>>(array_device,pitch,M);
    cudaMemcpy(array_host, array_device, M*N*sizeof(int), cudaMemcpyDeviceToHost);

global void kernel(int vet, int pitch, int k)
{
int iy = blockIdx.y
blockDim.y + threadIdx.y;
int ix = blockIdx.xblockDim.x + threadIdx.x;
int
r= (int*)((char*)vet + iy * pitch);
r[ix]=k*r[ix];
}

Since you use cudaMallocPitch, you should use cudaMemcpy2D which takes the pitch as an extra argument regarding cudaMemcpy

Yves

Since you use cudaMallocPitch, you should use cudaMemcpy2D which takes the pitch as an extra argument regarding cudaMemcpy

Yves