Copying to a 3D cuda array cudaMemcpyToArray returns cudaErrorInvalidValue

Hi all,

When I try to copy memory from host to a 3D array, I get an cudaErrorInvalidValue. Code snippet as follow:

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); // texture elements are float

  // dimensions of texture 

  cudaExtent extent;

  extent.width = C; // 51

  extent.height = ZM; // 3

  extent.depth = ZN; // 3

CUDA_SAFE( cudaMalloc3DArray(&Z_d, &channelDesc, extent) ); // allocate GPU memory as a 3D cuda-array -> success

  cudaError_t errCode = cudaMemcpyToArray(Z_d, 0, 0, Z, C*ZM*ZN*sizeof(T), cudaMemcpyHostToDevice); // --> cudaErrorInvalidValue

My feeling is I should be using a function cudaMemcpyToArray3D, but it doesn’t exist

Any advice would be greatly appreciated!



Hi tried to reduce my test as much as possible…

when runs it returns: Call to ‘cudaMemcpyToArray’ returned ‘invalid argument’

Any idea how I can copy memory to a 3d array?

Thanks a lot,



// compile with "nvcc"


// this exmaple shows that copying memory to a 3d texture fails

#include <cuda.h>

#include <stdio.h>

#include ""

void safecall(cudaError_t call, const char* fname) {

  printf("Call to '%s' returned '%s'\n", fname, cudaGetErrorString(call));

  if (call != cudaSuccess) exit(0);


int main() {

  const int N = 3, M = 3, C = 5; // size of the texture: MxNxC

// grid and block dimensions

  dim3 gridSize(C, 1);

  dim3 blockSize(8, 8);

// allocate memory on host

  float Z[N*M*C];

// pointer to texture memory on device

  cudaArray *Z_d;

// setup texture dimension

  cudaExtent extent;

  extent.width = C;

  extent.height = M;

  extent.depth = N;

// texture channel descriptor

  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); 

// memory allocation for texture, on GPU

  safecall(cudaMalloc3DArray(&Z_d, &channelDesc, extent), "cudaMalloc3DArray"); // allocate GPU memory as a 3D cuda-array  

// copy from host memory to device memory

  safecall(cudaMemcpyToArray(Z_d, 0, 0, Z, C*M*N*sizeof(float), cudaMemcpyHostToDevice), "cudaMemcpyToArray" );

// bind memory to the texture

  safecall(cudaBindTextureToArray(tex, Z_d, channelDesc), "cudaBindTextureToArray" );

// dummy kernel call

  test_kernel<<<gridSize, blockSize>>>();

return 0;


// texture declaration

static texture<float,3,cudaReadModeElementType> tex;

// empty kernel

__global__ void test_kernel() {}

In case it’s useful to someone I solved my problem (a simple example is in the SDK in ‘simpleTexture3D’)…

In summary, instead of using cudaMemcpyToArray one has to do something like that:

// copy from host memory to device memory

  cudaMemcpy3DParms copyParams = {0};

  copyParams.srcPtr   = make_cudaPitchedPtr((void*)Z, M*sizeof(float), M, N);  

  copyParams.dstArray = Z_d;

  copyParams.extent   = extent;

  copyParams.kind	 = cudaMemcpyHostToDevice;

//safecall(cudaMemcpyToArray(Z_d, 0, 0, Z, C*M*N*sizeof(float), cudaMemcpyHostToDevice), "cudaMemcpyToArray" );

  safecall( cudaMemcpy3D(&copyParams), "cudaMemcpy3D" );

maybe this should be clarified in the documentation?



Thank you.

I was also having this problem and your solution worked.

Will someone at NVIDIA please note this and fix the documentation. 3D textures
badly needs some working examples in the doc. Thank you.


There is something strange in this code. in the extent you define the order of the coordinates to be C,M,N
but then when creating the pitched pointer M and N are the first coordinates.