Q 4 NVIDIA developers - CUDA 2.3 - make_cudaPitchedPtr broken?


Using CUDA 2.3.

Trying to create cu pitched ptr:


cudaExtent ves;

ves.width = nx = 2;

ves.height = ny =3;

ves.depth = nz = 4;

cudaMemcpy3DParms copyParams = {0};

copyParams.srcPtr = make_cudaPitchedPtr((void*)&h_volume[0][0][0], sizeof(float), ves.width, ves.height);

fprintf(stdout, “pitch of copyParams.srcPtr is %d\n”, copyParams.srcPtr.pitch);

fprintf(stdout, “copyParams.srcPtr.xsize = %d\n”, copyParams.dstPtr.xsize);

fprintf(stdout, “copyParams.srcPtr.ysize = %d, d_vp.ysize = %d\n”, copyParams.dstPtr.ysize);

copyParams.srcPtr.xsize = ves.width;

copyParams.srcPtr.ysize = ves.height;

fprintf(stdout, “copyParams.srcPtr.xsize = %d\n”, copyParams.srcPtr.xsize);

frintf(stdout, “copyParams.srcPtr.ysize = %d\n”, copyParams.srcPtr.ysize);



pitch of copyParams.srcPtr is 4

copyParams.srcPtr.xsize = 0

copyParams.srcPtr.ysize = 0

copyParams.srcPtr.xsize = 2

copyParams.srcPtr.ysize = 3

So it looks like make_cudaPitchedPtr() either assigns 0s or ignores width and height of cudaPitchedPtr structure parameters to make_cudaPitchedPtr()

Am I missing something here, doing wrong?

Also, related to this,

when I try to make cudaMemcpy3DParms copyParams.dstPtr structure xsize and ysize fields to be the same as srcPtr - I am getting an error when I do cudaMemcpy3D() - “invalid device pointer”.

It’s ok to make the pitch the same between srcPtr and dstPtr, but make xsize and ysize the same is not.

what am I missing here?

Thanks in advance.

It looks like you’re outputting the wrong values (i.e., the ones from dstPtr instead of srcPtr)? Also the second fprintf() has two %d’s but only one value passed in.

PS: The make_cuda*() functions are actually inline functions defined in driver_functions.h, so you can take a look at the implementation if you want. (It’s very straightforward.)

Thanks. My bad. Was messing around with the code all day long, so I got crossed-eyed at the end of the day.

Still, my second question remains:

Why, when I assign xsize and ysize directly to the dstPtr, - cudaMemcpy3D gives “invalid device pointer error”. here is code excerpt:


cudaExtent ves;

ves.width = nx; //2

ves.height = ny; //3

ves.depth = nz; //4

cudaPitchedPtr d_vp;

fprintf(stderr, "pitch of declared d_vp   = %d\n", d_vp.pitch);

//gets a bunch of garbage values assigned, naturally

fprintf(stderr, "pointer of declared d_vp   = %d\n", &d_vp.ptr);

fprintf(stderr, "xsize of declared d_vp   = %d\n", d_vp.xsize);

fprintf(stderr, "ysize of declared d_vp   = %d\n", d_vp.ysize);

cudaMemcpy3DParms copyParams = {0};

copyParams.srcPtr   = make_cudaPitchedPtr((void*)&h_vp[0][0][0], sizeof(float), ves.width, ves.height);

fprintf(stderr, "pitch of copyParams.srcPtr   = make_cudaPitchedPtr((void*)h_vp is %d\n", copyParams.srcPtr.pitch);

fprintf(stderr, "copyParams.srcPtr.xsize =  %d,  d_vp.xsize = %d\n", copyParams.srcPtr.xsize, d_vp.xsize);

fprintf(stderr, "copyParams.srcPtr.ysize =  %d,  d_vp.ysize = %d\n", copyParams.srcPtr.ysize, d_vp.ysize);

copyParams.srcPtr.xsize = ves.width;

copyParams.srcPtr.ysize = ves.height;

fprintf(stderr, "copyParams.srcPtr.xsize =  %d\n",  copyParams.srcPtr.xsize);

fprintf(stderr, "copyParams.srcPtr.ysize =  %d\n",  copyParams.srcPtr.ysize);

//Trying to make destination cudaPitchedPtr to be the same as source except for the memory pointer of course.

d_vp.pitch = copyParams.srcPtr.pitch;

d_vp.xsize = copyParams.srcPtr.xsize;

d_vp.ysize = copyParams.srcPtr.ysize;

copyParams.dstPtr   = d_vp;

fprintf(stderr, "copyParams.dstPtr.xsize =  %d,  d_vp.xsize = %d\n", copyParams.dstPtr.xsize, d_vp.xsize);

fprintf(stderr, "copyParams.dstPtr.ysize =  %d,  d_vp.ysize = %d\n", copyParams.dstPtr.ysize, d_vp.ysize);

cudaMalloc3D(&d_vp, ves);

CheckLastErrorCUDA("cudaMalloc3D d_vp",stderr);

fprintf(stderr, "pitch of copyParams.dstPtr   = d_vp %d\n", copyParams.dstPtr.pitch);

copyParams.extent   = ves;

copyParams.kind     = cudaMemcpyHostToDevice;

fprintf(stderr, "DEBUG:trying to do first - vp cudaMemcpy3D:\n");



fprintf(stderr, "DEBUG:after first call to  cudaMemcpy3D:\n");


CheckLastErrorCUDA("cudaMemcpy3D vp to dev", stderr);



pitch of declared d_vp = 4211666

pointer of declared d_vp = -1836051440

xsize of declared d_vp = 406589440

ysize of declared d_vp = 404410946

pitch of copyParams.srcPtr = make_cudaPitchedPtr((void*)h_vp is 4

copyParams.srcPtr.xsize = 2, d_vp.xsize = 406589440

copyParams.srcPtr.ysize = 3, d_vp.ysize = 404410946

copyParams.srcPtr.xsize = 2

copyParams.srcPtr.ysize = 3

copyParams.srcPtr.xsize = 2, d_vp.xsize = 2

copyParams.srcPtr.ysize = 3, d_vp.ysize = 3

pitch of copyParams.dstPtr = d_vp 4

DEBUG:trying to do first - vp cudaMemcpy3D:

DEBUG:after first call to cudaMemcpy3D:

CUDA error: cudaMemcpy3D vp to dev invalid device pointer.

If only pitch is assigned - that’s fine. But as far as xsize and ysize are touched cudaMemcpy3D() failes with “invalid device pointer” message.

Any thoughts?

I think you’re misunderstanding cudaPitchedPtr - the pitch should be the width of the data in bytes (the size of each row).

cudaMemcpy3DParms copyParams = {0};

copyParams.srcPtr = make_cudaPitchedPtr((void*)&h_vp[0][0][0], ves.width*sizeof(float), ves.width, ves.height);

But are you going to fix the CUDA Programming Guide, which still has a bug in the last listing on page 20? I believe that the extent should be declared as

cudaExtent extent = make_cudaExtent(64 * sizeof(float), 64, 64);

What’s even more confusing is that when an extent is passed to [font=“Courier New”]cudaMalloc3DArray[/font] (as opposed to a plain [font=“Courier New”]cudaMalloc3D[/font]), the width field of the extent is supposed to be in elements, and not bytes.


YDD: Yes, this is fixed already (as of a month or so ago) for CUDA 3.1. See http://forums.nvidia.com/index.php?showtopic=163970 .