Can any one explain me how to use the cudaBindTextureToArray(), where it is applied and an example.
Actually I want to use the read-only memory of device, can we use the read only memory with the above cudaBindTextureToArray() to do it. Can we store a 2d array in read only memory of size nearly 400 bytes to 500 bytes
Here is a minimal example showing the use of CUDA arrays. For me the tricky part is to remember which widths are specified in elements / texels, and which ones are specified in bytes. Other than that it is pretty straightforward.
Sorry, I am having trouble attaching the code. Let’s try this again:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
texture<float, 2, cudaReadModeElementType> tex;
#define res(r,c) res[(r)*n+(c)] // row-major
__global__ void copyMatrix (float *res, int m, int n)
{
int col = threadIdx.x;
int row = threadIdx.y;
res (row, col) = tex2D (tex, col + 0.5f, row + 0.5f);
}
#define M 5 // rows, height
#define N 4 // columns, width
int main (void)
{
float matrix[M][N] = {{11, 12, 13, 14},
{21, 22, 23, 24},
{31, 32, 33, 34},
{41, 42, 43, 44},
{51, 52, 53, 54}};
float res[M][N] = {0};
float *res_d = 0;
int i, j;
cudaArray *arr = 0;
dim3 blks(1);
dim3 thrds(N,M);
CUDA_SAFE_CALL (cudaMallocArray (&arr, &tex.channelDesc, N, M));
CUDA_SAFE_CALL (cudaMemcpy2DToArray (arr, 0, 0, matrix,
N*sizeof(matrix[0][0]), // src pitch
N*sizeof(matrix[0][0]), // width:bytes
M, // height:rows
cudaMemcpyHostToDevice));
CUDA_SAFE_CALL (cudaBindTextureToArray (tex, arr));
CUDA_SAFE_CALL (cudaMalloc ((void **)&res_d, M * N * sizeof(res_d[0])));
copyMatrix<<<blks,thrds>>>(res_d, M, N);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaMemcpy (res,res_d,sizeof(res),cudaMemcpyDeviceToHost));
for (i = 0; i < M; i++) {
for (j = 0; j < N; j++) {
printf ("%f ", res[i][j]);
}
printf ("\n");
}
printf ("\n");
CUDA_SAFE_CALL (cudaFree (res_d));
CUDA_SAFE_CALL (cudaUnbindTexture (tex));
CUDA_SAFE_CALL (cudaFreeArray (arr));
return EXIT_SUCCESS;
}
Thanks