Textures: Please provide some feedback

caucci · February 24, 2013, 9:33pm

Hello,

For my own future reference, I have put together some simple examples on how to use texture memory in CUDA. I’d like to have feedback from expert users to make sure that I am doing things the right way.

The file below contains multiple examples (covering 1D, 2D, 3D, and cubemap texture binding and fetching as well as their layered variants, where applicable). Please remove the /* */ comment delimiters to access individual examples.

My examples are minimal and not intended to show anything besides how to use textures (for examples, each block contains just one thread… not very efficient). I chose rather odd numbers for the dimensions of my textures (indeed, they are all prime numbers) to avoid lucky situations.

Thanks,
Luca

#include <cuda.h>
#include <stdio.h>

#define DIM_X		23
#define DIM_Y		11
#define DIM_Z		7
#define DIM_FACE	29
#define NUM_LAYERS	5

#define MAP_2D(__dimx, __dimy, __x, __y)		((__y) * (__dimx) + (__x))
#define UNMAP_2D_X(__dimx, __dimy, __index)		((__index) % (__dimx))
#define UNMAP_2D_Y(__dimx, __dimy, __index)		((__index) / (__dimx))

#define MAP_3D(__dimx, __dimy, __dimz, __x, __y, __z)	(((__z) * (__dimy) + (__y)) * (__dimx) + (__x))
#define UNMAP_3D_X(__dimx, __dimy, __dimz, __index)	((__index) % (__dimx))
#define UNMAP_3D_Y(__dimx, __dimy, __dimz, __index)	(((__index) / (__dimx)) % (__dimy))
#define UNMAP_3D_Z(__dimx, __dimy, __dimz, __index)	((__index) / ((__dimx) * (__dimy)))

// Macro used to check CUDA function returning value and error
#define MY_SAFE_CALL(__call) do {										\
  cudaError_t __err = __call;											\
  if(__err != cudaSuccess) {											\
    fprintf(stderr, "CUDA driver error 0x%04X: %s\n", (unsigned int) __err, cudaGetErrorString(__err));		\
    fprintf(stderr, "File: \"%s\", function: \"%s\", line: %d\n", __FILE__, __FUNCTION__, __LINE__);		\
    exit(EXIT_FAILURE);												\
  }														\
} while(0)

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/*
// *** 1D Texture Example ***

texture<float4, cudaTextureType1D, cudaReadModeElementType> data_tex;

void __global__ kernel(float *test_dev) {
  const int n = MAP_2D(gridDim.x, gridDim.y, blockIdx.x, blockIdx.y);
  
  if(n < DIM_X) {
    test_dev[n] = tex1D(data_tex, n + 0.50f).x;
  }
  return;
}

int main(int argc, char **argv) {
  struct cudaChannelFormatDesc desc;
  struct cudaArray *data_dev;
  float4 data[DIM_X];
  float test[DIM_X];
  float *test_dev;
  float a, b;
  int i, size;

  for(i = 0; i < DIM_X; ++i) {
    data[i] = make_float4(i + 1.00, i + 2.00, i + 3.00, i + 4.00);
  }
  MY_SAFE_CALL(cudaSetDevice(0));
  desc = cudaCreateChannelDesc<float4>();
  MY_SAFE_CALL(cudaMallocArray(& data_dev, & desc, DIM_X));
  MY_SAFE_CALL(cudaMemcpyToArray(data_dev, 0, 0, data, sizeof(data), cudaMemcpyHostToDevice));
  data_tex.normalized = 0;
  data_tex.filterMode = cudaFilterModeLinear;
  data_tex.addressMode[0] = cudaAddressModeBorder;
  MY_SAFE_CALL(cudaBindTextureToArray(data_tex, data_dev, desc));
  MY_SAFE_CALL(cudaMalloc((void **) (& test_dev), DIM_X * sizeof(*test_dev)));
  size = (int) ceil(sqrtf(DIM_X));
  kernel<<<dim3(size, size), 1>>>(test_dev);
  MY_SAFE_CALL(cudaUnbindTexture(data_tex));
  MY_SAFE_CALL(cudaFreeArray(data_dev));
  MY_SAFE_CALL(cudaMemcpy(test, test_dev, DIM_X * sizeof(*test), cudaMemcpyDeviceToHost));
  MY_SAFE_CALL(cudaFree(test_dev));
  for(i = 0; i < DIM_X; ++i) {
    a = test[i];
    b = data[i].x;
    printf("%d:\t%f\t%f\t%f\n", i, a, b, fabsf(a - b));
  }
  return(0);
}
*/

/*
// *** 1D Layered Texture Example ***

texture<float4, cudaTextureType1DLayered, cudaReadModeElementType> data_tex;

void __global__ kernel(float *test_dev) {
  const int n = MAP_2D(gridDim.x, gridDim.y, blockIdx.x, blockIdx.y);
  
  if(n < (DIM_X * NUM_LAYERS)) {
    const int n_x = UNMAP_2D_X(DIM_X, NUM_LAYERS, n);
    const int layer = UNMAP_2D_Y(DIM_X, NUM_LAYERS, n);
    test_dev[n] = tex1DLayered(data_tex, n_x + 0.50f, layer).x;
  }
  return;
}

int main(int argc, char **argv) {
  static float4 data[DIM_X * NUM_LAYERS];
  static float test[DIM_X * NUM_LAYERS];
  struct cudaMemcpy3DParms copy_par = {0};
  struct cudaChannelFormatDesc desc;
  struct cudaArray *data_dev;
  float *test_dev;
  int i, layer, n;
  float a, b;
  int size;

  for(i = 0; i < DIM_X; ++i) {
    for(layer = 0; layer < NUM_LAYERS; ++layer) {
      n = MAP_2D(DIM_X, NUM_LAYERS, i, layer);
      data[n] = make_float4(i + 1.00, layer + 1.00, i + layer + 1.00, n);
    }
  }
  MY_SAFE_CALL(cudaSetDevice(0));
  desc = cudaCreateChannelDesc<float4>();
  MY_SAFE_CALL(cudaMalloc3DArray(& data_dev, & desc, make_cudaExtent(DIM_X, 0, NUM_LAYERS), cudaArrayLayered));
  copy_par.extent = make_cudaExtent(DIM_X, 1, NUM_LAYERS);
  copy_par.kind = cudaMemcpyHostToDevice;
  copy_par.dstArray = data_dev;
  copy_par.srcPtr = make_cudaPitchedPtr(data, DIM_X * sizeof(*data), DIM_X, 1);
  MY_SAFE_CALL(cudaMemcpy3D(& copy_par));
  data_tex.normalized = 0;
  data_tex.filterMode = cudaFilterModeLinear;
  data_tex.addressMode[0] = cudaAddressModeBorder;
  MY_SAFE_CALL(cudaBindTextureToArray(data_tex, data_dev, desc));
  MY_SAFE_CALL(cudaMalloc((void **) (& test_dev), DIM_X * NUM_LAYERS * sizeof(*test_dev)));
  size = (int) ceil(sqrtf(DIM_X * NUM_LAYERS));
  kernel<<<dim3(size, size), 1>>>(test_dev);
  MY_SAFE_CALL(cudaUnbindTexture(data_tex));
  MY_SAFE_CALL(cudaFreeArray(data_dev));
  MY_SAFE_CALL(cudaMemcpy(test, test_dev, DIM_X * NUM_LAYERS * sizeof(*test), cudaMemcpyDeviceToHost));
  MY_SAFE_CALL(cudaFree(test_dev));
  for(layer = 0; layer < NUM_LAYERS; ++layer) {
    for(i = 0; i < DIM_X; ++i) {
      a = test[MAP_2D(DIM_X, NUM_LAYERS, i, layer)];
      b = data[MAP_2D(DIM_X, NUM_LAYERS, i, layer)].x;
      printf("[%d, %d]\t%10.5f\t%10.5f\t\t%e\n", i, layer, a, b, fabsf(a - b));
    }
  }
  return(0);
}
*/

/*
// *** 2D Texture Example ***

texture<float4, cudaTextureType2D, cudaReadModeElementType> data_tex;

void __global__ kernel(float *test_dev) {
  const int n = MAP_2D(gridDim.x, gridDim.y, blockIdx.x, blockIdx.y);
  
  if(n < (DIM_X * DIM_Y)) {
    const int n_x = UNMAP_2D_X(DIM_X, DIM_Y, n);
    const int n_y = UNMAP_2D_Y(DIM_X, DIM_Y, n);
    test_dev[n] = tex2D(data_tex, n_x + 0.50f, n_y + 0.50f).x;
  }
  return;
}

int main(int argc, char **argv) {
  struct cudaChannelFormatDesc desc;
  float4 data[DIM_X * DIM_Y];
  float test[DIM_X * DIM_Y];
  float4 *data_dev;
  float *test_dev;
  float a, b;
  int i, j, n;
  size_t pitch;
  int size;

  for(i = 0; i < DIM_X; ++i) {
    for(j = 0; j < DIM_Y; ++j) {
      n = MAP_2D(DIM_X, DIM_Y, i, j);
      data[n] = make_float4(i + 1.00, j + 1.00, i + j + 1.00, n);
    }
  }
  MY_SAFE_CALL(cudaSetDevice(0));
  MY_SAFE_CALL(cudaMallocPitch((void **) (& data_dev), & pitch, DIM_X * sizeof(*data_dev), DIM_Y));
  MY_SAFE_CALL(cudaMemcpy2D(data_dev, pitch, data, DIM_X * sizeof(*data), DIM_X * sizeof(*data), DIM_Y, cudaMemcpyHostToDevice));
  desc = cudaCreateChannelDesc<float4>();
  data_tex.normalized = 0;
  data_tex.filterMode = cudaFilterModeLinear;
  data_tex.addressMode[0] = cudaAddressModeBorder;
  data_tex.addressMode[1] = cudaAddressModeBorder;
  MY_SAFE_CALL(cudaBindTexture2D(NULL, data_tex, data_dev, desc, DIM_X, DIM_Y, pitch));
  MY_SAFE_CALL(cudaMalloc((void **) (& test_dev), DIM_X * DIM_Y * sizeof(*test_dev)));
  size = (int) ceil(sqrtf(DIM_X * DIM_Y));
  kernel<<<dim3(size, size), 1>>>(test_dev);
  MY_SAFE_CALL(cudaUnbindTexture(data_tex));
  MY_SAFE_CALL(cudaFree(data_dev));
  MY_SAFE_CALL(cudaMemcpy(test, test_dev, DIM_X * DIM_Y * sizeof(*test), cudaMemcpyDeviceToHost));
  MY_SAFE_CALL(cudaFree(test_dev));
    for(j = 0; j < DIM_Y; ++j) {
      for(i = 0; i < DIM_X; ++i) {
      a = test[MAP_2D(DIM_X, DIM_Y, i, j)];
      b = data[MAP_2D(DIM_X, DIM_Y, i, j)].x;
      printf("[%d, %d]\t%10.5f\t%10.5f\t\t%e\n", i, j, a, b, fabsf(a - b));
    }
  }
  return(0);
}
*/

/*
// *** 2D Layered Texture Example ***

texture<float4, cudaTextureType2DLayered, cudaReadModeElementType> data_tex;

void __global__ kernel(float *test_dev) {
  const int n = MAP_2D(gridDim.x, gridDim.y, blockIdx.x, blockIdx.y);
  
  if(n < DIM_X * DIM_Y * NUM_LAYERS) {
    const int n_x = UNMAP_3D_X(DIM_X, DIM_Y, NUM_LAYERS, n);
    const int n_y = UNMAP_3D_Y(DIM_X, DIM_Y, NUM_LAYERS, n);
    const int layer = UNMAP_3D_Z(DIM_X, DIM_Y, NUM_LAYERS, n);
    test_dev[n] = tex2DLayered(data_tex, n_x + 0.50f, n_y + 0.50f, layer).x;
  }
  return;
}

int main(int argc, char **argv) {
  static float4 data[DIM_X * DIM_Y * NUM_LAYERS];
  static float test[DIM_X * DIM_Y * NUM_LAYERS];
  struct cudaMemcpy3DParms copy_par = {0};
  struct cudaChannelFormatDesc desc;
  struct cudaArray *data_dev;
  struct cudaExtent extent;
  float *test_dev;
  int i, j, layer, n;
  float a, b;
  int size;

  for(i = 0; i < DIM_X; ++i) {
    for(j = 0; j < DIM_Y; ++j) {
      for(layer = 0; layer < NUM_LAYERS; ++layer) {
        n = MAP_3D(DIM_X, DIM_Y, NUM_LAYERS, i, j, layer);
        data[n] = make_float4(i + 1.00, j + 1.00, layer + 1.00, n);
      }
    }
  }
  MY_SAFE_CALL(cudaSetDevice(0));
  desc = cudaCreateChannelDesc<float4>();
  extent = make_cudaExtent(DIM_X, DIM_Y, NUM_LAYERS);
  MY_SAFE_CALL(cudaMalloc3DArray(& data_dev, & desc, extent, cudaArrayLayered));
  copy_par.extent = extent;
  copy_par.kind = cudaMemcpyHostToDevice;
  copy_par.dstArray = data_dev;
  copy_par.srcPtr = make_cudaPitchedPtr(data, DIM_X * sizeof(*data), DIM_X, DIM_Y);
  MY_SAFE_CALL(cudaMemcpy3D(& copy_par));
  data_tex.normalized = 0;
  data_tex.filterMode = cudaFilterModeLinear;
  data_tex.addressMode[0] = cudaAddressModeBorder;
  data_tex.addressMode[1] = cudaAddressModeBorder;
  MY_SAFE_CALL(cudaBindTextureToArray(data_tex, data_dev, desc));
  MY_SAFE_CALL(cudaMalloc((void **) (& test_dev), DIM_X * DIM_Y * NUM_LAYERS * sizeof(*test_dev)));
  size = (int) ceil(sqrtf(DIM_X * DIM_Y * NUM_LAYERS));
  kernel<<<dim3(size, size), 1>>>(test_dev);
  MY_SAFE_CALL(cudaUnbindTexture(data_tex));
  MY_SAFE_CALL(cudaFreeArray(data_dev));
  MY_SAFE_CALL(cudaMemcpy(test, test_dev, DIM_X * DIM_Y * NUM_LAYERS * sizeof(*test), cudaMemcpyDeviceToHost));
  MY_SAFE_CALL(cudaFree(test_dev));
  for(layer = 0; layer < NUM_LAYERS; ++layer) {
    for(i = 0; i < DIM_X; ++i) {
      for(j = 0; j < DIM_Y; ++j) {
        a = test[MAP_3D(DIM_X, DIM_Y, NUM_LAYERS, i, j, layer)];
        b = data[MAP_3D(DIM_X, DIM_Y, NUM_LAYERS, i, j, layer)].x;
        printf("[%d, %d, %d]\t%10.5f\t%10.5f\t\t%e\n", i, j, layer, a, b, fabsf(a - b));
      }
    }
  }
  return(0);
}
*/

/*
// *** 3D Texture Example ***

texture<float4, cudaTextureType3D, cudaReadModeElementType> data_tex;

void __global__ kernel(float *test_dev) {
  const int n = MAP_2D(gridDim.x, gridDim.y, blockIdx.x, blockIdx.y);
  
  if(n < DIM_X * DIM_Y * DIM_Z) {
    const int n_x = UNMAP_3D_X(DIM_X, DIM_Y, DIM_Z, n);
    const int n_y = UNMAP_3D_Y(DIM_X, DIM_Y, DIM_Z, n);
    const int n_z = UNMAP_3D_Z(DIM_X, DIM_Y, DIM_Z, n);
    test_dev[n] = tex3D(data_tex, n_x + 0.50f, n_y + 0.50f, n_z + 0.50f).x;
  }
  return;
}

int main(int argc, char **argv) {
  static float4 data[DIM_X * DIM_Y * DIM_Z];
  static float test[DIM_X * DIM_Y * DIM_Z];
  struct cudaMemcpy3DParms copy_par = {0};
  struct cudaChannelFormatDesc desc;
  struct cudaArray *data_dev;
  struct cudaExtent extent;
  float *test_dev;
  int i, j, k, n;
  float a, b;
  int size;

  for(i = 0; i < DIM_X; ++i) {
    for(j = 0; j < DIM_Y; ++j) {
      for(k = 0; k < DIM_Z; ++k) {
        n = MAP_3D(DIM_X, DIM_Y, DIM_Z, i, j, k);
        data[n] = make_float4(i + 1.00, j + 1.00, k + 1.00, n);
      }
    }
  }
  MY_SAFE_CALL(cudaSetDevice(0));
  desc = cudaCreateChannelDesc<float4>();
  extent = make_cudaExtent(DIM_X, DIM_Y, DIM_Z);
  MY_SAFE_CALL(cudaMalloc3DArray(& data_dev, & desc, extent));
  copy_par.extent = extent;
  copy_par.kind = cudaMemcpyHostToDevice;
  copy_par.dstArray = data_dev;
  copy_par.srcPtr = make_cudaPitchedPtr(data, DIM_X * sizeof(*data), DIM_X, DIM_Y);
  MY_SAFE_CALL(cudaMemcpy3D(& copy_par));
  data_tex.normalized = 0;
  data_tex.filterMode = cudaFilterModeLinear;
  data_tex.addressMode[0] = cudaAddressModeBorder;
  data_tex.addressMode[1] = cudaAddressModeBorder;
  MY_SAFE_CALL(cudaBindTextureToArray(data_tex, data_dev, desc));
  MY_SAFE_CALL(cudaMalloc((void **) (& test_dev), DIM_X * DIM_Y * DIM_Z * sizeof(*test_dev)));
  size = (int) ceil(sqrtf(DIM_X * DIM_Y * DIM_Z));
  kernel<<<dim3(size, size), 1>>>(test_dev);
  MY_SAFE_CALL(cudaUnbindTexture(data_tex));
  MY_SAFE_CALL(cudaFreeArray(data_dev));
  MY_SAFE_CALL(cudaMemcpy(test, test_dev, DIM_X * DIM_Y * DIM_Z * sizeof(*test), cudaMemcpyDeviceToHost));
  MY_SAFE_CALL(cudaFree(test_dev));
  for(i = 0; i < DIM_X; ++i) {
    for(j = 0; j < DIM_Y; ++j) {
      for(k = 0; k < DIM_Z; ++k) {
        a = test[MAP_3D(DIM_X, DIM_Y, DIM_Z, i, j, k)];
        b = data[MAP_3D(DIM_X, DIM_Y, DIM_Z, i, j, k)].x;
        printf("[%d, %d, %d]\t%10.5f\t%10.5f\t\t%e\n", i, j, k, a, b, fabsf(a - b));
      }
    }
  }
  return(0);
}
*/

/*
// *** Cubemap Texture Example ***

texture<float4, cudaTextureTypeCubemap, cudaReadModeElementType> data_tex;

void __global__ kernel(float *test_dev) {
  const int n = MAP_2D(gridDim.x, gridDim.y, blockIdx.x, blockIdx.y);
  
  if(n < DIM_X * DIM_Y * DIM_Z) {
    const float x = (UNMAP_3D_X(DIM_X, DIM_Y, DIM_Z, n) + 0.50f) / ((float) DIM_X) * 2.00f - 1.00f;
    const float y = (UNMAP_3D_Y(DIM_X, DIM_Y, DIM_Z, n) + 0.50f) / ((float) DIM_Y) * 2.00f - 1.00f;
    const float z = (UNMAP_3D_Z(DIM_X, DIM_Y, DIM_Z, n) + 0.50f) / ((float) DIM_Z) * 2.00f - 1.00f;
    test_dev[n] = texCubemap(data_tex, x, y, z).z;
  }
  return;
}

int main(int argc, char **argv) {
  static float4 data[DIM_FACE * DIM_FACE * 6];
  static float test[DIM_X * DIM_Y * DIM_Z];
  struct cudaMemcpy3DParms copy_par = {0};
  struct cudaChannelFormatDesc desc;
  struct cudaArray *data_dev;
  struct cudaExtent extent;
  float *test_dev;
  int i, j, k, face, n;
  int size;

  for(i = 0; i < DIM_FACE; ++i) {
    for(j = 0; j < DIM_FACE; ++j) {
      for(face = 0; face < 6; ++face) {
        n = MAP_3D(DIM_FACE, DIM_FACE, 6, i, j, face);
        data[n] = make_float4(i + 1.00, j + 1.00, face, n);
      }
    }
  }
  MY_SAFE_CALL(cudaSetDevice(0));
  desc = cudaCreateChannelDesc<float4>();
  extent = make_cudaExtent(DIM_FACE, DIM_FACE, 6);
  MY_SAFE_CALL(cudaMalloc3DArray(& data_dev, & desc, extent, cudaArrayCubemap));
  copy_par.extent = extent;
  copy_par.kind = cudaMemcpyHostToDevice;
  copy_par.dstArray = data_dev;
  copy_par.srcPtr = make_cudaPitchedPtr(data, DIM_FACE * sizeof(*data), DIM_FACE, DIM_FACE);
  MY_SAFE_CALL(cudaMemcpy3D(& copy_par));
  data_tex.normalized = 0;
  data_tex.filterMode = cudaFilterModeLinear;
  data_tex.addressMode[0] = cudaAddressModeBorder;
  data_tex.addressMode[1] = cudaAddressModeBorder;
  MY_SAFE_CALL(cudaBindTextureToArray(data_tex, data_dev, desc));
  MY_SAFE_CALL(cudaMalloc((void **) (& test_dev), DIM_X * DIM_Y * DIM_Z * sizeof(*test_dev)));
  size = (int) ceil(sqrtf(DIM_X * DIM_Y * DIM_Z));
  kernel<<<dim3(size, size), 1>>>(test_dev);
  MY_SAFE_CALL(cudaUnbindTexture(data_tex));
  MY_SAFE_CALL(cudaFreeArray(data_dev));
  MY_SAFE_CALL(cudaMemcpy(test, test_dev, DIM_X * DIM_Y * DIM_Z * sizeof(*test), cudaMemcpyDeviceToHost));
  MY_SAFE_CALL(cudaFree(test_dev));
  for(i = 0; i < DIM_X; ++i) {
    for(j = 0; j < DIM_Y; ++j) {
      for(k = 0; k < DIM_Z; ++k) {
        n = MAP_3D(DIM_X, DIM_Y, DIM_Z, i, j, k);
        printf("[%d, %d, %d]\t%10.5f\n", i, j, k, test[n]);
      }
    }
  }
  return(0);
}
*/

/*
// *** Cubemap Layered Texture Example ***

texture<float4, cudaTextureTypeCubemapLayered, cudaReadModeElementType> data_tex;

void __global__ kernel(float *test_dev) {
  const int n = blockIdx.x;
  const int layer = blockIdx.y;
  const float x = (UNMAP_3D_X(DIM_X, DIM_Y, DIM_Z, n) + 0.50f) / ((float) DIM_X) * 2.00f - 1.00f;
  const float y = (UNMAP_3D_Y(DIM_X, DIM_Y, DIM_Z, n) + 0.50f) / ((float) DIM_Y) * 2.00f - 1.00f;
  const float z = (UNMAP_3D_Z(DIM_X, DIM_Y, DIM_Z, n) + 0.50f) / ((float) DIM_Z) * 2.00f - 1.00f;
  
  test_dev[MAP_2D(DIM_X * DIM_Y * DIM_Z, NUM_LAYERS, n, layer)] = texCubemapLayered(data_tex, x, y, z, layer).z;
  return;
}

int main(int argc, char **argv) {
  static float4 data[(DIM_FACE * DIM_FACE) * (6 * NUM_LAYERS)];
  static float test[(DIM_X * DIM_Y * DIM_Z) * NUM_LAYERS];
  struct cudaMemcpy3DParms copy_par = {0};
  struct cudaChannelFormatDesc desc;
  struct cudaArray *data_dev;
  struct cudaExtent extent;
  float *test_dev;
  int i, j, k, m, n;
  int face, layer;

  for(layer = 0; layer < NUM_LAYERS; ++layer)  {
    for(face = 0; face < 6; ++face) {
      m = MAP_2D(6, NUM_LAYERS, face, layer);
      for(i = 0; i < DIM_FACE; ++i) {
        for(j = 0; j < DIM_FACE; ++j) {
          n = MAP_2D(DIM_FACE, DIM_FACE, i, j);
          data[MAP_2D(DIM_FACE * DIM_FACE, 6 * NUM_LAYERS, n, m)] = make_float4(i + 1.00, j + 1.00, face + 1.00, layer + 1.00);
        }
      }
    }
  }
  MY_SAFE_CALL(cudaSetDevice(0));
  desc = cudaCreateChannelDesc<float4>();
  extent = make_cudaExtent(DIM_FACE, DIM_FACE, 6 * NUM_LAYERS);
  MY_SAFE_CALL(cudaMalloc3DArray(& data_dev, & desc, extent, cudaArrayCubemap | cudaArrayLayered));
  copy_par.extent = extent;
  copy_par.kind = cudaMemcpyHostToDevice;
  copy_par.dstArray = data_dev;
  copy_par.srcPtr = make_cudaPitchedPtr(data, DIM_FACE * sizeof(*data), DIM_FACE, DIM_FACE);
  MY_SAFE_CALL(cudaMemcpy3D(& copy_par));
  data_tex.normalized = 0;
  data_tex.filterMode = cudaFilterModeLinear;
  data_tex.addressMode[0] = cudaAddressModeBorder;
  data_tex.addressMode[1] = cudaAddressModeBorder;
  MY_SAFE_CALL(cudaBindTextureToArray(data_tex, data_dev, desc));
  MY_SAFE_CALL(cudaMalloc((void **) (& test_dev), (DIM_X * DIM_Y * DIM_Z) * NUM_LAYERS * sizeof(*test_dev)));
  kernel<<<dim3(DIM_X * DIM_Y * DIM_Z, NUM_LAYERS), 1>>>(test_dev);
  MY_SAFE_CALL(cudaUnbindTexture(data_tex));
  MY_SAFE_CALL(cudaFreeArray(data_dev));
  MY_SAFE_CALL(cudaMemcpy(test, test_dev, (DIM_X * DIM_Y * DIM_Z) * NUM_LAYERS * sizeof(*test), cudaMemcpyDeviceToHost));
  MY_SAFE_CALL(cudaFree(test_dev));
  for(layer = 0; layer < NUM_LAYERS; ++layer)  {
    for(i = 0; i < DIM_X; ++i) {
      for(j = 0; j < DIM_Y; ++j) {
        for(k = 0; k < DIM_Z; ++k) {
          n = MAP_3D(DIM_X, DIM_Y, DIM_Z, i, j, k);
          printf("[%d, %d, %d, %d]\t%10.5f\n", layer, i, j, k, test[MAP_2D(DIM_X * DIM_Y * DIM_Z, NUM_LAYERS, n, layer)]);
        }
      }
    }
  }
  return(0);
}
*/

das_j · February 24, 2013, 9:37pm

Is this also in your code?

EDIT: Sorry, it’s everywhere in the code…

caucci · February 24, 2013, 9:40pm

Sorry, I had forgotten to put my code within the “begin code” and “end code” tags.

Topic		Replies	Views
Simplest texture 2D examples CUDA Programming and Performance	11	12111	March 26, 2019
improved texfetch to exploit all of texture hardware CUDA Programming and Performance	21	9105	May 4, 2007
CUDA Texture Memory Example for Beginners CUDA Programming and Performance	6	5899	July 10, 2023
Matlab crashes when I use 3d texture in CUDA code CUDA Programming and Performance	21	11051	November 17, 2010
Output of 2D texture memory is zero CUDA Programming and Performance	9	1197	March 30, 2021
2D Texture access How can I access pixels from 2D texture CUDA Programming and Performance	24	18804	June 5, 2010
Decreased performance when using textures CUDA Programming and Performance	2	521	April 8, 2019
Using Textures CUDA Programming and Performance	10	22047	March 29, 2007
Actual max texture dimension size of Tesla C2050 CUDA Programming and Performance	7	6567	June 16, 2011
Problem with texture memory CUDA Programming and Performance	10	1875	August 6, 2010

Textures: Please provide some feedback

Related topics