3D linear memory can't get it to work

I cannot get the following code to work. All I get is zeros, i’ve tried everything.
[indent]
global void Set( cudaPitchedPtr* d_c, int width, int height, int N)
{[indent]
char* devPtr = (char*)d_c->ptr;
size_t pitch = d_c->pitch;
size_t slicePitch = pitch * height;
for (int z = 0; z < N; ++z)
{
[indent]char* slice = devPtr + z * slicePitch;
for (int y = 0; y < height; ++y)
{
[indent]float* row = (float*)(slice + y * pitch);
for (int x = 0; x < width; ++x)
{
[indent]row = 5;[/indent]
}[/indent]
}[/indent]
}[/indent]
}

float* CUDA::CallSet(void)
{[indent]
int width = 64;
int height = 64;
int depth = 3;
int strideinbytes = 64 * sizeof(float);
int size = width * height * depth * sizeof(float);;

float* h_A = (float*)malloc(size);

cudaExtent extent = make_cudaExtent(strideinbytes, height, depth);

cudaPitchedPtr devPitchedPtr;

cudaMalloc3D(&devPitchedPtr, extent);

Set<<<100, 512>>>(&devPitchedPtr, width, height, depth);

cudaMemcpy3DParms myParms = {0};

cudaPitchedPtr dst = make_cudaPitchedPtr(h_A, strideinbytes, width, height);
cudaExtent ext = make_cudaExtent(strideinbytes, height, depth);

myParms.srcPtr = devPitchedPtr;
myParms.dstPtr = dst;
myParms.extent = ext;
myParms.kind = cudaMemcpyDeviceToHost;

cudaMemcpy3D(&myParms);

//h_A is always all zeros
return h_A;[/indent]

}
[/indent]