cudaBindTexture failure

Hello! Probably my question is stupid, but I can’t bind a texture to any linear memory (1D allocated with cudaMalloc or 2D allocated with cudaMallocPitch).
Only bind to CUDA-array works correctly.

Examples from 4.0 documentation for linear memory don’t work for me too. My OS is Ubuntu 10.10,64-bit.
The code is:

#include <cuda_runtime.h>
#include <cutil_inline.h>

float devPtr;
size_t size=64
sizeof(float);
CUDA_SAFE_CALL(cudaMalloc((void **) &devPtr, size));

texture<float, cudaTextureType1D, cudaReadModeElementType> texRef;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc();

cudaError_t err=cudaDeviceSynchronize();
printf(“Error before bind: error code=%d (%s)\n”,err,cudaGetErrorString(err));
err = cudaBindTexture(NULL, &texRef, devPtr, &channelDesc, size);
printf(“Error after bind: error code=%d (%s)\n”,err,cudaGetErrorString(err));

Result is:
Error before bind: error code=0 (no error)
Error after bind: error code=18 (invalid texture reference)

If I call the cudaBindTexture with the same parameter as in documentation (just texRef, not &texRef) than compiler gives error:
no instance of overloaded function “cudaBindTexture” matches the argument list
argument types are: (long, texture<float, 1, cudaReadModeElementType>, float *, cudaChannelFormatDesc *, size_t)

What is wrong in my code? Any hint is appreciated.
Thanks.

I added a bit of code to your snippet to make it into a complete program, and don’t see any problems. I put the following into a file bindtex.cu and compiled it with nvcc -o bindtex bindtex.cu

#include <stdio.h>

#include <stdlib.h>

#define CUDA_SAFE_CALL(call)                                          \

do {                                                                  \

    cudaError_t err = call;                                           \

    if (cudaSuccess != err) {                                         \

        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\

                 __FILE__, __LINE__, cudaGetErrorString(err) );       \

        exit(EXIT_FAILURE);                                           \

    }                                                                 \

} while (0)

__global__ void kernel (float *res, int n);

texture<float, cudaTextureType1D, cudaReadModeElementType> texRef;

int main (void)

{

    float *devPtr;

    float *res_d;

    float res[64] = {0};

    float src[64] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,

                     16,17,18,19,20,21,22,23,24,25,26,27,38,29,30,31,

                     32,32,34,35,36,37,38,39,40,41,42,43,44,45,46,47,

                     48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};

    size_t size = 64*sizeof(float);

    CUDA_SAFE_CALL (cudaMalloc((void **) &devPtr, size));

    CUDA_SAFE_CALL (cudaMalloc((void **) &res_d, size));

    CUDA_SAFE_CALL (cudaMemset(res_d, 0xff, size)); /* NaN */

    CUDA_SAFE_CALL (cudaMemcpy(devPtr,src,sizeof(src),cudaMemcpyHostToDevice));

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();

cudaError_t err= cudaDeviceSynchronize();

    printf("Error before bind: error code=%d (%s)\n",

           err, cudaGetErrorString(err));

    err = cudaBindTexture (NULL, &texRef, devPtr, &channelDesc, size);

    printf("Error after bind: error code=%d (%s)\n",

           err, cudaGetErrorString(err));

    kernel<<<1,1>>>(res_d, 64);

    CUDA_SAFE_CALL (cudaMemcpy (res,res_d,sizeof(res),cudaMemcpyDeviceToHost));

    for (int i = 0; i < 64; i++) {

        printf ("res[%2d] = %8.5f\n", i, res[i]);

    }

    CUDA_SAFE_CALL (cudaUnbindTexture (texRef));

    CUDA_SAFE_CALL (cudaFree (devPtr));

    CUDA_SAFE_CALL (cudaFree (res_d));

    return EXIT_SUCCESS;

}

__global__ void kernel (float *res, int n)

{

    for (int i = 0; i < n; i++) {

        res[i] = tex1Dfetch(texRef, i);

    }

}

The output is as expected:

Error before bind: error code=0 (no error)

Error after bind: error code=0 (no error)

res[ 0] =  0.00000

res[ 1] =  1.00000

[...]

res[62] = 62.00000

res[63] = 63.00000

Thanks a lot! I came to the same - texture must be declared in file, not in main function.

Textures cannot be passed as kernel arguments. In order to be visible to both host and device code they must be declared at the top-level file scope.