Problem reading values back from texture Memory

Hi,

I’m trying to write a simple program that binds texture memory. And then read the values back from the texture memory

But when I try to do this the read back values are all different.

Seems like I’m making a mistake with cudaBindTexture.

Please take a look and let me know where I’m going wrong

I have attached the files also

thanks in advance

test_tex.cu

// Utilities and system includes

#include <shrUtils.h>

#include <cutil_inline.h>

#include "tex_test_kernel.cu"

// Allocates an array with random float entries.

void randomInit_float(float* data, int size, float min, float max)

{

	for (int i = 0; i < size; ++i)

 	data[i] = ((max-min)*((float)rand()/RAND_MAX))+ min;

}

// main

int

main( int argc, char** argv) 

{

	int size_a, size_b ; 

	

	// host side variables

	float *h_vector_gold, *h_vector_tex;

	// device side variables 

	float *d_vector_tex ;

	int mem_size_vector;

	size_a = 8;

	size_b = 16 ;

	

	mem_size_vector = size_a * size_b * sizeof(float);

	

	// allocate host side memory

	h_vector_gold = (float*) malloc(mem_size_vector);

	h_vector_tex = (float*) malloc(mem_size_vector);

	

	// allocate device side memory

	cutilSafeCall(cudaMalloc((void**) &d_vector_tex, mem_size_vector));

	

	// set seed for rand()

	srand(1983);

	// loading gold vector with random values

	randomInit_float(h_vector_gold, size_a * size_b , 0.0, 1.0);

	

	cudaBindTexture( NULL, tex_A, h_vector_gold, mem_size_vector);

	dim3 grid(128/8);

	dim3 threads(8);

	

	test_tex<<<grid,threads>>>(d_vector_tex);

	

	cutilSafeCall( cudaMemcpy(h_vector_tex,d_vector_tex, mem_size_vector, cudaMemcpyDeviceToHost) );

	

	

	for(int i=0; i<128;i++)

		printf("\n%d\t%f\t%f", i, h_vector_gold[i], h_vector_tex[i]);

		

	

	// cleanup

	cudaUnbindTexture(tex_A);

	free(h_vector_gold);

	free(h_vector_tex);

	cutilSafeCall(cudaFree(d_vector_tex));

	cudaThreadExit();

	

	return 0;

}

test_tex_kernel.cu

#include <cuda_runtime.h>

#include <cuda.h>

texture<float> tex_A;

__global__ void

test_tex( float* vector)				

{

	

	// Block index

	int bx = blockIdx.x;

	

	

	// Thread index

	int tx = threadIdx.x;

		

	// Index processed by the block

	int index = 8 * bx + tx;

	vector[index] = tex1Dfetch(tex_A,index);

	

}

Makefile.txt (1.91 KB)
tex_test.cu (1.5 KB)
tex_test_kernel.cu (331 Bytes)