3D Texturing

Hi,

is there anybody who could help me with some code?
I have got float* and want to make a CUDA-Array with Dimensions dx, dy and dz out of it. Then I want to fetch points in the gridspace by interpolation between the 8 neighbours of every point in space. I tried nearly everything but I always get incompatible types.

Thx in advance :smile:

Here’s some simple code that just uses a kernel to copy data from a 3D texture to gmem array. It shows the steps to setup a 3D texture adn while it doesn’t set up filtering, that would be straightforward addition.

Paulius

#include <stdio.h>

texture<float,3,cudaReadModeElementType> tex_image;

__global__ void tex3D_kernel(float *image, const int dimx, const int dimy, const int dimz)

{

    int tx = blockIdx.x*blockDim.x + threadIdx.x;

    int ty = blockIdx.y*blockDim.y + threadIdx.y;

    int tz = threadIdx.z;

   int idx = tz*dimx*dimy + ty*dimx + tx;

   image[idx] = tex3D( tex_image, tx,ty,tz);

}

void init_data(float *a, const int dimx, const int dimy, const int dimz)

{

    for(int iz=0; iz<dimz; iz++)

        for(int iy=0; iy<dimy; iy++)

            for(int ix=0; ix<dimx; ix++)

            {

                *a = 1000*(iz+1) + 100*(iy+1) +ix;

                ++a;

            }

}

void print_image(float *a, const int dimx, const int dimy, const int dimz)

{

    int idx = 0;

    for(int iz=0; iz<dimz; iz++)

    {

        for(int iy=0; iy<dimy; iy++)

        {

            for(int ix=0; ix<dimx; ix++)

            {

                printf("%d,", (int)a[idx]);

                idx++;

            }

            printf("\n");

        }

        printf("--------------------\n");

    }

}

int main()

{

    int dimx, dimy, dimz;

	

    dimx = 16;

    dimy = 2;

    dimz = 2;

   int num_image_bytes = dimx*dimy*dimz*sizeof(float);

    printf("image:\t%7.2f MB\n", num_image_bytes/(1024.f*1024.f));

    printf("\n");

   ///////////////////////////////////////////////////

    // allocate and initialize memory

    float *h_image=0, *d_image=0;

    h_image = (float*)malloc(num_image_bytes);

    cudaMalloc((void**)&d_image, num_image_bytes);

   if( 0==h_image || 0==d_image )

    {

        printf("couldn't allocate memory\n");

    }

   cudaMemset(d_image, 0, num_image_bytes);

   ////////////////////////////////////////////////////

    // prepare texture

    cudaChannelFormatDesc ca_descriptor;

    cudaExtent ca_extent;

    cudaArray *ca_image=0;

   ca_descriptor = cudaCreateChannelDesc<float>();

    ca_extent.width  = dimx;

    ca_extent.height = dimy;

    ca_extent.depth  = dimz;

    cudaMalloc3DArray( &ca_image, &ca_descriptor, ca_extent );

    cudaBindTextureToArray( tex_image, ca_image, ca_descriptor );

	

    init_data( h_image, dimx,dimy,dimz );

	

    cudaMemcpy3DParms cpy_params = {0};

    cpy_params.extent   = ca_extent;

    cpy_params.kind     = cudaMemcpyHostToDevice;

    cpy_params.dstArray = ca_image;

    cpy_params.srcPtr   = make_cudaPitchedPtr( (void*)h_image, dimx*sizeof(float), dimx, dimy );

   cudaMemcpy3D( &cpy_params );

   ///////////////////////////////////////////////////

   dim3 block(16, 2, dimz);

    dim3 grid( dimx/block.x, dimy/block.y, 1);

    printf("(%d,%d)x(%d,%d,%d)\n", grid.x,grid.y, block.x,block.y,block.z);

   tex3D_kernel<<<grid,block>>>(d_image, dimx, dimy, dimz);

   memset( h_image, 0, num_image_bytes );

    cudaMemcpy( h_image, d_image, num_image_bytes, cudaMemcpyDeviceToHost );

    print_image( h_image, dimx,dimy,dimz );

   printf("CUDA: %s\n", cudaGetErrorString(cudaGetLastError()));

    

    if(d_image)

        cudaFree(d_image);

    if(h_image)

        free(h_image);

   cudaUnbindTexture(tex_image);

    if(ca_image)

        cudaFreeArray(ca_image);

   return 0;

}

Hi,
and thank you very much for this clean code!

I have got one more question to this code.

All the time, I try to launch the kernel, I get:
CUDA: unspecified launch failure

So, I’m wondering how this could be… my array has got a size of 15 MB (GT8800).

Are you sure on this line:

cpy_params.srcPtr = make_cudaPitchedPtr( (void*)h_image, dimx*sizeof(float), dimx, dimy );

shouldn’t it be " dimx*sizeof(float), dimy, dimz " ?

you were right, sorry.

In case of a direct cudaMemcpy3D from HostToDevice, I get an “unspecified launch failure”,
seems like a driver bug.
If I do first a cudaMemcpy to a global memory and then
a cudaMemcpy3D DeviceToDevice it works fine for me?!

Does the code above also fail during kernel execution?

Paulius

So, the code above does not work correctly for you as is? If so, which OS/driver/toolkit are you using?

Paulius