Hello,
We can use cudaMalloc3D for 3D arrays, but what about 4D (and more) arrays?
Sould I use something like this :
// Host code
cudaExtent extent = make_cudaExtent(DIM_1*sizeof(float), DIM_2, <b>DIM_3*DIM_4</b>);
cudaPitchedPtr devPitchedPtr;
cudaMalloc3D(&devPitchedPtr, extent);
MyKernel(devPitchedPtr, DIM_1, DIM_2, <b>DIM_3*DIM_4</b>);
// Device code
__global__ void MyKernel(cudaPitchedPtr devPitchedPtr,
int dim_1, int dim_2, int dim_3)
{
char* devPtr = devPitchedPtr.ptr;
size_t pitch = devPitchedPtr.pitch;
size_t slicePitch = pitch * dim_2;
for (int z = 0; z < dim_3; ++z) {
char* slice = devPtr + z * slicePitch;
for (int y = 0; y < dim_2; ++y) {
float* row = (float*)(slice + y * pitch);
for (int x = 0; x < dim_1; ++x) {
float element = row[x];
}
}
}
}
What do you suggest?
Thanks for your reply
Ghislain