Hi,
I need some help about the use of the texture memory and 1D memory texture fetching.
In the small attached example, I create two tables, one that is filled with the threads indexes, and another that is the copy of the previous, after it have been fetched as a texture. Each thread copy one element from the source table to the destination.
And for some reason, the result is a table full of 0.
Any idea ?
Ok, maybe I’m dumb but I don’t manage to attach my code, so here it is:
#include <cutil.h>
#include <cuda.h>
#include <stdio.h>
texture<float, 1, cudaReadModeElementType> tex;
//filling the first table
__global__ void myker(float *t){
t[threadIdx.x] = threadIdx.x;
}
//doing the copy
__global__ void myker2(float *r){
r[threadIdx.x] = tex1Dfetch(tex, threadIdx.x);
}
int main(){
float tab[16];
float *tab_d;
float r[16];
float *r_d;
CUDA_SAFE_CALL( cudaMalloc((void**) &tab_d, 16*sizeof(float)));
CUDA_SAFE_CALL( cudaMalloc((void**) &r_d, 16*sizeof(float)));
dim3 BLOCK(16, 1);
dim3 GRID(1,1);
myker <<< BLOCK, GRID >>> (tab_d);
CUDA_SAFE_CALL( cudaBindTexture( 0, tex, tab_d, 16*sizeof(float)));
//for displaying
CUDA_SAFE_CALL( cudaMemcpy(tab, tab_d, 16*sizeof(float) , cudaMemcpyDeviceToHost));
myker2 <<< BLOCK, GRID >>> (r_d);
CUDA_SAFE_CALL( cudaMemcpy(r, r_d, 16*sizeof(float), cudaMemcpyDeviceToHost));
for(int i = 0; i < 16; i++)
printf("tab[%d]= %f, r[%d] = %f\n", i, tab[i], i, r[i]);
CUDA_SAFE_CALL( cudaUnbindTexture(tex));
CUDA_SAFE_CALL( cudaFree(tab_d));
CUDA_SAFE_CALL( cudaFree(r_d));
return 0;
}