How to use cudaMalloc3DArray to copy dynamic 3d array from host to device?

as title

Ubuntu 10.04

GTX285

CUDA Toolkit 3.2

I have a 5123232 array,

use C++ Standard Template Library vectors to build 3d array

and each element is double type

define t 512, kx 32, ky 32

vector<vector<vector<double> > > real(t, vector<vector<double> >(kx, vector<double> (ky)));

vector<vector<vector<double> > > dev_real(t, vector<vector<double> >(kx, vector<double> (ky)));

cudaArray *cuArray;

cudaExtent extent = make_cudaExtent(t, kx, ky);

cudaChannelFormatDesc desc = cudaCreateChannelDesc<double>();

cudaMalloc3DArray(&cuArray, &desc, extent);

cudaError status;

//=====HostToDevie=====//

cudaMemcpy3DParms HostToDev = {0};

HostToDev.srcPtr = make_cudaPitchedPtr((void**) &real, sizeof(double)*t, kx, ky);

HostToDev.dstArray = cuArray;

HostToDev.extent = extent;

HostToDev.kind = cudaMemcpyHostToDevice;

status = cudaMemcpy3D(&HostToDev);

if(status != cudaSuccess)

{

  cerr<<"MemcpyHostToDevice: "<<cudaGetErrorString(status)<<endl;

}

//=====DeviceToHost=====//;

cudaMemcpy3DParms DevToHost = {0};

DevToHost.dstPtr = make_cudaPitchedPtr((void**) &dev_real, sizeof(double)*t, kx, ky);

DevToHost.srcArray = cuArray;

DevToHost.extent = extent;

DevToHost.kind = cudaMemcpyDeviceToHost;

status = cudaMemcpy3D(&DevToHost);

if(status != cudaSuccess)

{

  cerr<<"MemcpyDeviceToHost: "<<cudaGetErrorString(status)<<endl;

}

cudaFreeArray(cuArray);

for(int pge=0; pge<t; pge++){

  for(int col=0; col<kx; col++){

    for(int row=0; row<ky; row++){

        cout<<setprecision(16)<<real[row][col][pge]<<endl;

        cout<<setprecision(16)<<dev_real[row][col][pge]<<endl;

    }

  }

}

but the cudaGetErrorString(status) outputs

MemcpyHostToDevice: unspecified driver error

MemcpyDeviceToHost: unspecified driver error

real[0][0][0] = -2.838901364980553e-06

dev_real[0][0][0] = 0

real[1][0][0] = 6.81439681366683e-06

dev_real[1][0][0] = 0

real[0][1][0] = 2.912717275699299e-06

dev_real[0][1][0] = 0

real[1][1][0] = 6.001882060663956e-06

dev_real[1][1][0] = 0

real[0][0][1] = -1.416185500425027e-05

dev_real[0][0][1] = 0

real[1][0][1] = 1.276328002830376e-05

dev_real[1][0][1] = 0

real[0][1][1] = -2.871574757567012e-06

dev_real[0][1][1] = 0

real[1][1][1] = 7.496291361935671e-06

dev_real[1][1][1] = 0

and the cudaGetErrorString(status) outputs
MemcpyHostToDevice: unspecified driver error
MemcpyDeviceToHost: unspecified driver error