Hi all.
After spending a lot of time searching for how to properly use 3D to linear allocation in CUDA I finally got it done and I’m posting about my findings.
Most people have problems with pitched sizes and consequently memory corruption:
http://forums.nvidia.com/index.php?showtopic=84371
http://forums.nvidia.com/index.php?showtopic=157812
http://forums.nvidia.com/index.php?showtopic=101833
but that’s a good read and i learn a lot.
REVIEWED AND WORKING FOR ANY SIZE (>64K PITCH) (thnks CapJo)
However the main point is to properly specify the cudaExtent and cudaPitchedStr (both) structs:
alloc device mem
// disregard what everyone says about the width component having the physical size of your datatype
extent.width=x;//no need for the =x*sizeof(datatype) as said on all posts
going in conformity with the reference manual: only cudaArrays dont need physical size, everything else does
cudaExtent extent;
extent.width=x*sizeof(datatype);
extent.height=y;
extent.depth=z;
cudaPitchedPtr mem_device;
cudaMalloc3D(&mem_device,extent);
copy params
cudaMemcpy3DParms p = { 0 };
p.srcPtr = make_cudaPitchedPtr((void*)mem_host, x*sizeof(float3),x,y);
p.dstPtr = mem_device;
p.extent = extent;
p.kind = cudaMemcpyHostToDevice;
status=cudaMemcpy3D(&p);
if(status != cudaSuccess){fprintf(stderr, "MemcpyHtD: %s\n", cudaGetErrorString(status));}
MyKernel<<<1,1>>>(mem_device,extent);
access data as you like (and makes sense)
__global__ void MyKernel(cudaPitchedPtr devPitchedPtr,cudaExtent extent)
{
char* devPtr = (char*) devPitchedPtr.ptr;
size_t pitch = devPitchedPtr.pitch;
size_t slicePitch = pitch * extent.height;
for(int k=0; k < extent.depth; k++){
char* slice = devPtr + k * slicePitch;
for(int j=0; j< extent.height; j++){
float3* row = (float3*) (slice+j*pitch);
//cuPrintf("j:%f pitch:%d slicePitch:%d\n",j,pitch,slicePitch);
//for(int i=0; i< (extent.width/sizeof(float3));i++){
//cuPrintf("x:%f y:%f z:%f\n",row[i].x,row[i].y,row[i].z);
//}
teste(row,extent.width/sizeof(float3));
}
}
}
__device__ void teste(float3* row, size_t width){
for(int x=0; x < width;x++){
row[x].x+=2;
row[x].y+=2;
row[x].z+=2;
cuPrintf("x:%f y:%f z:%f\n",row[x].x,row[x].y,row[x].z);
}
}
THNKS CapJo ( http://forums.nvidia.com/index.php?showtop…st&p=990000 )
pitchf.cu (2.46 KB)