I cannot get the following code to work. All I get is zeros, i’ve tried everything.
[indent]
global void Set( cudaPitchedPtr* d_c, int width, int height, int N)
{[indent]
char* devPtr = (char*)d_c->ptr;
size_t pitch = d_c->pitch;
size_t slicePitch = pitch * height;
for (int z = 0; z < N; ++z)
{
[indent]char* slice = devPtr + z * slicePitch;
for (int y = 0; y < height; ++y)
{
[indent]float* row = (float*)(slice + y * pitch);
for (int x = 0; x < width; ++x)
{
[indent]row = 5;[/indent]
}[/indent]
}[/indent]
}[/indent]
}
float* CUDA::CallSet(void)
{[indent]
int width = 64;
int height = 64;
int depth = 3;
int strideinbytes = 64 * sizeof(float);
int size = width * height * depth * sizeof(float);;
float* h_A = (float*)malloc(size);
cudaExtent extent = make_cudaExtent(strideinbytes, height, depth);
cudaPitchedPtr devPitchedPtr;
cudaMalloc3D(&devPitchedPtr, extent);
Set<<<100, 512>>>(&devPitchedPtr, width, height, depth);
cudaMemcpy3DParms myParms = {0};
cudaPitchedPtr dst = make_cudaPitchedPtr(h_A, strideinbytes, width, height);
cudaExtent ext = make_cudaExtent(strideinbytes, height, depth);
myParms.srcPtr = devPitchedPtr;
myParms.dstPtr = dst;
myParms.extent = ext;
myParms.kind = cudaMemcpyDeviceToHost;
cudaMemcpy3D(&myParms);
//h_A is always all zeros
return h_A;[/indent]
}
[/indent]