Here’s some simple code that just uses a kernel to copy data from a 3D texture to gmem array. It shows the steps to setup a 3D texture adn while it doesn’t set up filtering, that would be straightforward addition.
Paulius
#include <stdio.h>
texture<float,3,cudaReadModeElementType> tex_image;
__global__ void tex3D_kernel(float *image, const int dimx, const int dimy, const int dimz)
{
int tx = blockIdx.x*blockDim.x + threadIdx.x;
int ty = blockIdx.y*blockDim.y + threadIdx.y;
int tz = threadIdx.z;
int idx = tz*dimx*dimy + ty*dimx + tx;
image[idx] = tex3D( tex_image, tx,ty,tz);
}
void init_data(float *a, const int dimx, const int dimy, const int dimz)
{
for(int iz=0; iz<dimz; iz++)
for(int iy=0; iy<dimy; iy++)
for(int ix=0; ix<dimx; ix++)
{
*a = 1000*(iz+1) + 100*(iy+1) +ix;
++a;
}
}
void print_image(float *a, const int dimx, const int dimy, const int dimz)
{
int idx = 0;
for(int iz=0; iz<dimz; iz++)
{
for(int iy=0; iy<dimy; iy++)
{
for(int ix=0; ix<dimx; ix++)
{
printf("%d,", (int)a[idx]);
idx++;
}
printf("\n");
}
printf("--------------------\n");
}
}
int main()
{
int dimx, dimy, dimz;
dimx = 16;
dimy = 2;
dimz = 2;
int num_image_bytes = dimx*dimy*dimz*sizeof(float);
printf("image:\t%7.2f MB\n", num_image_bytes/(1024.f*1024.f));
printf("\n");
///////////////////////////////////////////////////
// allocate and initialize memory
float *h_image=0, *d_image=0;
h_image = (float*)malloc(num_image_bytes);
cudaMalloc((void**)&d_image, num_image_bytes);
if( 0==h_image || 0==d_image )
{
printf("couldn't allocate memory\n");
}
cudaMemset(d_image, 0, num_image_bytes);
////////////////////////////////////////////////////
// prepare texture
cudaChannelFormatDesc ca_descriptor;
cudaExtent ca_extent;
cudaArray *ca_image=0;
ca_descriptor = cudaCreateChannelDesc<float>();
ca_extent.width = dimx;
ca_extent.height = dimy;
ca_extent.depth = dimz;
cudaMalloc3DArray( &ca_image, &ca_descriptor, ca_extent );
cudaBindTextureToArray( tex_image, ca_image, ca_descriptor );
init_data( h_image, dimx,dimy,dimz );
cudaMemcpy3DParms cpy_params = {0};
cpy_params.extent = ca_extent;
cpy_params.kind = cudaMemcpyHostToDevice;
cpy_params.dstArray = ca_image;
cpy_params.srcPtr = make_cudaPitchedPtr( (void*)h_image, dimx*sizeof(float), dimx, dimy );
cudaMemcpy3D( &cpy_params );
///////////////////////////////////////////////////
dim3 block(16, 2, dimz);
dim3 grid( dimx/block.x, dimy/block.y, 1);
printf("(%d,%d)x(%d,%d,%d)\n", grid.x,grid.y, block.x,block.y,block.z);
tex3D_kernel<<<grid,block>>>(d_image, dimx, dimy, dimz);
memset( h_image, 0, num_image_bytes );
cudaMemcpy( h_image, d_image, num_image_bytes, cudaMemcpyDeviceToHost );
print_image( h_image, dimx,dimy,dimz );
printf("CUDA: %s\n", cudaGetErrorString(cudaGetLastError()));
if(d_image)
cudaFree(d_image);
if(h_image)
free(h_image);
cudaUnbindTexture(tex_image);
if(ca_image)
cudaFreeArray(ca_image);
return 0;
}