copy cudaMemcpyDeviceToDevice Problem


i’m trying to get

//create arrays on device:

cudaArray *d_volume = 0;

volumeSize = make_cudaExtent(dimX, dimY, dimZ);

// create 3D array

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>();

CUDA_SAFE_CALL(cudaMalloc3DArray(&d_volume, &channelDesc, volumeSize));

CUDA_SAFE_CALL(cudaMalloc((void**)&tmpVolume, width*height*depth*sizeof(uchar)));

CUDA_SAFE_CALL(cudaMemset(tmpVolume, 0, width*height*depth*sizeof(uchar)));

//try to copy:

cudaMemcpy3DParms copyParams = {0};

copyParams.srcPtr   = make_cudaPitchedPtr((void*)tmpVolume, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height);

copyParams.dstArray = d_volume;

copyParams.extent   = volumeSize;

copyParams.kind     = cudaMemcpyDeviceToDevice;

cudaError err =  cudaMemcpy3D(&copyParams);

to work, but whatever I do, I alway get the error message cudaErrorInvalidValue.

Has anybody an Idea whats the problem here???




I have exactly the same problem… does anyone know what the problem is here? Using float instead of uchar works but I really need uchar for performance reasons.

Best regards,

Most likely the problem is caused by the way your data is stored in gmem before copying into a 3D array. Since copy into 3D array requires a pitched pointer, try allocating with cudaMallocPitch() rather than with cudaMalloc. Then use the pitch that cudaMallocPitch returns as an argument when creating pitched pointer.

Here’s the code that worked for me (I too was getting the error when I used cudaMalloc). Let me know if this fixes your problem.


#include <stdio.h>

#define TYPE short

void init_data(TYPE *a, const int dimx, const int dimy, const int dimz)


    for(int iz=0; iz<dimz; iz++)

        for(int iy=0; iy<dimy; iy++)

            for(int ix=0; ix<dimx; ix++)


                *a = 1000*(iz+1) + 100*(iy+1) +ix;




int main()



   int dimx, dimy, dimz;


    dimx = 16;

    dimy = 2;

    dimz = 2;

   int num_image_bytes = dimx*dimy*dimz*sizeof(float);

    printf("image:\t%7.2f MB\n", num_image_bytes/(1024.f*1024.f));




    // allocate and initialize memory

    TYPE *h_image=0, *d_image=0;

    h_image = (TYPE*)malloc(num_image_bytes);

    size_t pitch=0;

    cudaMallocPitch( (void**)&d_image, &pitch, dimx*sizeof(TYPE), dimy*dimz );

    printf("pitch = %u\n", pitch);


    if( 0==h_image || 0==d_image )


        printf("couldn't allocate memory\n");

  return 1;


   cudaMemset(d_image, 0, num_image_bytes);


    // prepare texture

    cudaChannelFormatDesc ca_descriptor;

    cudaExtent ca_extent;

    cudaArray *ca_image=0;

   ca_descriptor = cudaCreateChannelDesc<TYPE>();

    ca_extent.width  = dimx;

    ca_extent.height = dimy;

    ca_extent.depth  = dimz;

    cudaMalloc3DArray( &ca_image, &ca_descriptor, ca_extent );


    init_data( h_image, dimx,dimy,dimz );

   cudaMemcpy( d_image, h_image, num_image_bytes, cudaMemcpyHostToDevice );

   cudaMemcpy3DParms cpy_params = {0};

    cpy_params.extent   = ca_extent;

    cpy_params.kind     = cudaMemcpyDeviceToDevice;

    cpy_params.dstArray = ca_image;

    cpy_params.srcPtr   = make_cudaPitchedPtr( (TYPE*)d_image, pitch, dimx, dimy);

   cudaMemcpy3D( &cpy_params );

    printf("d2d memcopy: %s\n", cudaGetErrorString( cudaGetLastError() ) );







   return 0;


Hello Paulius,

thank you very much, that was the problem. In my opinion, this is not entirely clear in the programming guide. Maybe there could be an additional note in future releases. But for now, everything is back on the road, thanks again!

Best regards,

Thanks a lot!!! It solved my problem, too!

:D :D :D

btw: what is guided mode?