When working with an unfamiliar API, it is natural that some confusion can occur. The CUDA documentation is sufficiently comprehensive that all the information needed to construct the examples I posted in this thread can be found there.
I have worked with mipmaps before: in OpenGL and OpenGL-ES. Not in CUDA. And I am not familiar with CUDA’s driver API as previously noted, having used the runtime API exclusively for a dozen years. So I am afraid I am unable to provide assistance for this new question. BTW, it would probably be best to post this new issue in a new thread dedicated to it. That makes it easier for future readers to find and reference.
P.S. Here is the previous example modified to use a ‘uchar4’ texture.
#include <stdlib.h>
#include <stdio.h>
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
texture<uchar4, 2, cudaReadModeElementType> tex;
__global__ void kernel (int m, int n)
{
uchar4 val;
for (int row = 0; row < m; row++) {
for (int col = 0; col < n; col++) {
val = tex2D (tex, col+0.5f, row+0.5f);
printf ("(%3u %3u %3u %3u) ", val.x, val.y, val.z, val.w);
}
printf ("\n");
}
}
int main (void)
{
int m = 4; // height = #rows
int n = 3; // width = #columns
size_t pitch, tex_ofs;
uchar4 arr[4][3]= {{{10,11,12,13},{20,21,22,23},{30,31,32,33}},
{{40,41,42,43},{50,51,52,53},{60,61,62,63}},
{{70,71,72,72},{80,81,82,83},{90,91,92,93}},
{{100,101,102,103},{110,111,112,113},{120,121,122,123}}};
uchar4 *arr_d = 0;
CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
tex.normalized = false;
CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
n, m, pitch));
if (tex_ofs !=0) {
printf ("tex_ofs = %zu\n", tex_ofs);
return EXIT_FAILURE;
}
printf ("reading array straight\n");
kernel<<<1,1>>>(m, n);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaDeviceSynchronize());
return EXIT_SUCCESS;
}
It works exactly as expected. The output is:
reading array straight
( 10 11 12 13) ( 20 21 22 23) ( 30 31 32 33)
( 40 41 42 43) ( 50 51 52 53) ( 60 61 62 63)
( 70 71 72 72) ( 80 81 82 83) ( 90 91 92 93)
(100 101 102 103) (110 111 112 113) (120 121 122 123)