cudaMemCpy wrong results

here is my code

__global__ void testKer3(float *data,float *result,int *val)

{

	int tid=threadIdx.x+blockIdx.x*blockDim.x+threadIdx.y+blockIdx.y*blockDim.y;

	if(tid<*val)

	{

		result[tid]=data[tid];

	}

}

int main()

{

	int host_totFilesNumber[1];

	host_totFilesNumber[0]=5;

	int *host_matchingPercentage=new int[host_totFilesNumber[0]];

	for(int i=0; i<host_totFilesNumber[0]; i++)

	{

		host_matchingPercentage[i]=90+i;

	}

	float *dev_matchingPercentage;	/////dev_matchingPercentage[host_totFilesNumber]

	cudaMalloc( (void**)&dev_matchingPercentage, host_totFilesNumber[0]*sizeof(float) );

	cudaMemcpy(dev_matchingPercentage,host_matchingPercentage, host_totFilesNumber[0]*sizeof(float),cudaMemcpyHostToDevice );

	cout<<"dev_matchingPercentage Test\n";

	float *dev_matchingPercentage_test;

	cudaMalloc((void**)&dev_matchingPercentage_test,host_totFilesNumber[0]*sizeof(float));

	int block_size4 = 4;

	int n_blocks4 = (host_totFilesNumber[0])/block_size4 + ((host_totFilesNumber[0])%block_size4 == 0 ? 0:1);

	int *totalIterations4;

	cudaMalloc((void**)&totalIterations4,sizeof(int));

	int hst_totIter4[1];

	hst_totIter4[0]=host_totFilesNumber[0];

	cudaMemcpy(totalIterations4,hst_totIter4, sizeof(int),cudaMemcpyHostToDevice );

	testKer3<<<n_blocks4, block_size4>>>(dev_matchingPercentage,dev_matchingPercentage_test,totalIterations4);

	float *host_matchingPercentage_result=new float[host_totFilesNumber[0]];

	cudaMemcpy(host_matchingPercentage_result,dev_matchingPercentage_test,host_totFilesNumber[0]*sizeof(float),cudaMemcpyDeviceToHost);

	cudaFree(totalIterations4);

	cudaFree(dev_matchingPercentage_test);

	bool testVar4=true;

	for(int i=0; i<host_totFilesNumber[0]; i++)

	{

		if(host_matchingPercentage_result[i]!=host_matchingPercentage[i])

		{

			cout<<"Copying test Failed\n\n";

			testVar4=false;

			break;

		}

	}

	if(testVar4)

	{

		cout<<"Copying test Passed\n\n";

	}

	cudaFree(dev_matchingPercentage);

}

and output i get is Copying test Failed

any suggestions what is wrong over here i am noob to cuda. Thanks for help mates :)

check errors from cuda* functions

tried CudaSafeCall and CudaCheckError(); functions but of not use, after calling kernel when i move back the results to host_matchingPercentage_result array and compare it to orignal value (host_matchingPercentage) i got diferrent results

host_matchingPercentage[0]=90 which i set but i get

host_matchingPercentage_result[0]=1.26117e-043

:(

Hi,

Your host_matchingPercent is an int* and you are threating it as a float*.

The bit pattern for 90 as int is the same value you get for float.

int test[1];

test[0] = 90;

float* tmp;

tmp = (float*)test;

printf("Val: %e",tmp[0]);

This will output ~1.26117e-043