Hello,
i am recently introducing myself into textures,
i plan on using its cache features to speed up some global memory reads im doing from a triangle index element array.
i did a small kernel for testing, where i read every triangle index and put it on another array, then i print both arrays and compare.
the problem is that the copied array is returning 0, there must be a problem with the texture fetch ??
this is the test kernel
texture<GLuint, 1, cudaReadModeElementType> texRefCheckTexture;
__global__ void kernelCheckTexture(GLuint* eab, GLuint* dest, int eabSize){
int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i<eabSize ){
dest[i] = tex1Dfetch(texRefCheckTexture, i);
}
}
and this is the setup:
void myCudaCheckTexture( GLuint eab, int numTriangles ){
GLuint *eabptr;
GLuint *h_eabptr;
GLuint *d_dest;
GLuint *h_dest;
int blockSize= 128;
h_dest = (GLuint*)malloc(numTriangles*3*sizeof(GLuint) );
cudaMalloc( (void**) &d_dest , numTriangles*3*sizeof(GLuint) );
printf("Kernel::TextureGL:: cudaMalloc d_dest%s\n", cudaGetErrorString(cudaGetLastError()));
//init array
cutilSafeCall(cudaGLMapBufferObject((void**)&eabptr, eab));
printf("Kernel::TextureGL:: mapBufferObject eab: %s\n", cudaGetErrorString(cudaGetLastError()));
h_eabptr = (GLuint*)malloc(numTriangles*3*sizeof(GLuint) );
//!copy eab to host
cudaMemcpy( h_eabptr, eabptr, numTriangles*3*sizeof(GLuint), cudaMemcpyDeviceToHost );
//!init texture
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<GLuint>();
CUDA_SAFE_CALL(cudaBindTexture(0, texRefCheckDelaunay, eabptr, channelDesc, numTriangles*3));
printf("Kernel::Check:: bind Texture: %s\n", cudaGetErrorString(cudaGetLastError()));
dim3 dimBlock(blockSize);
dim3 dimGrid((numTriangles+blockSize) / dimBlock.x);
cudaThreadSynchronize();
//!go kernel
kernelCheckTexture<<<dimGrid, dimBlock>>>(eabptr, d_dest, numTriangles*3);
cudaThreadSynchronize();
//bring back copied array which used texture 1D
cudaMemcpy( h_dest, d_dest, sizeof(GLuint)*3*numTriangles, cudaMemcpyDeviceToHost );
// unmap element array buffer object
cutilSafeCall(cudaGLUnmapBufferObject(eab));
printf("Kernel::TextureGL:: UnmapBufferObject eab: %s\n", cudaGetErrorString(cudaGetLastError()));
//Compare results
for(int i=0; i<numTriangles*3; i++){
printf("h_eabptr[%i] = %i h_dest[%i] = %i\n", i, h_eabptr[i], i, h_dest[i]);
}
cudaFree(d_dest);
free(h_eabptr);
free(h_dest);
//unbind texture
cudaUnbindTexture(texRefCheckDelaunay);
}
when i test with very small triangle array, i get this:
Kernel::TextureGL:: cudaMalloc d_destsetting the device when a process is active is not allowed
Kernel::TextureGL:: mapBufferObject eabno error
Kernel::TextureGL:: bind Texture no error
Kernel::TextureGL:: UnmapBufferObject eabno error
h_eabptr[0] = 3 h_dest[0] = 0
h_eabptr[1] = 1 h_dest[1] = 0
h_eabptr[2] = 2 h_dest[2] = 0
h_eabptr[3] = 0 h_dest[3] = 0
h_eabptr[4] = 1 h_dest[4] = 0
h_eabptr[5] = 3 h_dest[5] = 0
h_eabptr[6] = 2 h_dest[6] = 0
h_eabptr[7] = 5 h_dest[7] = 0
h_eabptr[8] = 3 h_dest[8] = 0
h_eabptr[9] = 4 h_dest[9] = 0
h_eabptr[10] = 5 h_dest[10] = 0
h_eabptr[11] = 2 h_dest[11] = 0
h_eabptr[12] = 3 h_dest[12] = 0
h_eabptr[13] = 5 h_dest[13] = 0
h_eabptr[14] = 6 h_dest[14] = 0
h_eabptr[15] = 3 h_dest[15] = 0
h_eabptr[16] = 6 h_dest[16] = 0
h_eabptr[17] = 0 h_dest[17] = 0
they should be equal,
is the texture binding done right?
edit: if i copy the normal way reading just global memory, it works. I must be making some mistake with the Texture, any help welcome. Im sure solving this basic problem will solve all the crashing on the real project which does something similar