after running the following code (which initializes a 1d array of size 16 floats to the threadid, then copies that bcak to host, then to a cuda array which is then bound to a texture, which is then called in a kernel to double the vlaues by thread), I get the following error message;
“cannot fetch from a texture that is not bound”
So even though the code contains the statement
cudaBindTextureToArray(tex_rho,cu_rho,channelDesc);
which is made after the new rho on the host ahas been copied to the cuda array, why is the texture tex_rho not bound to the cuda array cu_rho which should contain the initialized array?
#include <math.h>
#include <stdio.h>
#include <cutil.h>
#define NUMTHREADS 16
#define NUMBLOCKS 1
#define NTOTAL NUMBLOCKS*NUMTHREADS
__global__ void sumdensity(float* d_rho);
__global__ void kernel1(float* d_rho);
void allocateArray(void **devPtr,size_t size);
void freeArray(void *devPtr);
texture<float, 1, cudaReadModeElementType> tex_rho;
FILE* outfile;
int main(int argc, char** argv)
{
int i;
//host variables
float h_rho1[NTOTAL];
float h_rho2[NTOTAL];
int size_rho;
//device variables
float* d_rho1;
float* d_rho2;
//CUDA arrays
cudaArray* cu_rho;
cudaSetDevice(1);
CUT_DEVICE_INIT(argc, argv);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(64, 0, 0, 0, cudaChannelFormatKindFloat);
CUDA_SAFE_CALL( cudaMallocArray( &cu_rho, &channelDesc, NTOTAL, 1 ));
size_rho = NTOTAL*sizeof(float);
allocateArray((void**)&d_rho1,size_rho);
allocateArray((void**)&d_rho2,size_rho);
//set up texture for new rho
sumdensity<<<NUMBLOCKS,NUMTHREADS>>>(d_rho1); //write new rho into d_rho
cudaMemcpy(h_rho1,d_rho1,size_rho,cudaMemcpyDeviceToHost); //get d_rho back from device into h_rho
CUDA_SAFE_CALL( cudaMemcpyToArray( cu_rho, 0, 0, h_rho1, size_rho, cudaMemcpyHostToDevice)); //write h_rho into CUDA array cu_rho
// set texture parameters?
printf("\nstart binding...");
cudaBindTextureToArray(tex_rho,cu_rho,channelDesc); //bind cu_rho to tex_rho, kernel1 should now refer to tex_rho, not rho
//end set up of new rho as texture
printf("\nend binding...");
kernel1<<<NUMBLOCKS,NUMTHREADS>>>(d_rho2);
CUT_CHECK_ERROR("kernel error");
printf("\n\n%s\n", cudaGetErrorString(cudaGetLastError()));
cudaMemcpy(h_rho2,d_rho2,size_rho,cudaMemcpyDeviceToHost);
for(i=0; i<NTOTAL; i++) printf("\n%i\t%.5f",i,h_rho2[i]);
freeArray(d_rho1);
freeArray(d_rho2);
cudaFreeArray(cu_rho);
CUT_EXIT(argc, argv);
return 0;
}
/////////////////////
__global__ void sumdensity(float* d_rho)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
d_rho[i] = i;
}
///////////////////
__global__ void kernel1(float* d_rho)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float rho = tex1D(tex_rho,i);
rho = 2*rho;
d_rho[i] = rho;
//fprintf(outfile,"\n%i\t%.5f",i,rho);
}
///////////////////////////////
void allocateArray(void **devPtr,size_t size)
{
cudaMalloc(devPtr,size);
}
//////////////////////////////
void freeArray(void *devPtr)
{
cudaFree(devPtr);
}
//////////////////////////////