here is my code
__global__ void testKer3(float *data,float *result,int *val)
{
int tid=threadIdx.x+blockIdx.x*blockDim.x+threadIdx.y+blockIdx.y*blockDim.y;
if(tid<*val)
{
result[tid]=data[tid];
}
}
int main()
{
int host_totFilesNumber[1];
host_totFilesNumber[0]=5;
int *host_matchingPercentage=new int[host_totFilesNumber[0]];
for(int i=0; i<host_totFilesNumber[0]; i++)
{
host_matchingPercentage[i]=90+i;
}
float *dev_matchingPercentage; /////dev_matchingPercentage[host_totFilesNumber]
cudaMalloc( (void**)&dev_matchingPercentage, host_totFilesNumber[0]*sizeof(float) );
cudaMemcpy(dev_matchingPercentage,host_matchingPercentage, host_totFilesNumber[0]*sizeof(float),cudaMemcpyHostToDevice );
cout<<"dev_matchingPercentage Test\n";
float *dev_matchingPercentage_test;
cudaMalloc((void**)&dev_matchingPercentage_test,host_totFilesNumber[0]*sizeof(float));
int block_size4 = 4;
int n_blocks4 = (host_totFilesNumber[0])/block_size4 + ((host_totFilesNumber[0])%block_size4 == 0 ? 0:1);
int *totalIterations4;
cudaMalloc((void**)&totalIterations4,sizeof(int));
int hst_totIter4[1];
hst_totIter4[0]=host_totFilesNumber[0];
cudaMemcpy(totalIterations4,hst_totIter4, sizeof(int),cudaMemcpyHostToDevice );
testKer3<<<n_blocks4, block_size4>>>(dev_matchingPercentage,dev_matchingPercentage_test,totalIterations4);
float *host_matchingPercentage_result=new float[host_totFilesNumber[0]];
cudaMemcpy(host_matchingPercentage_result,dev_matchingPercentage_test,host_totFilesNumber[0]*sizeof(float),cudaMemcpyDeviceToHost);
cudaFree(totalIterations4);
cudaFree(dev_matchingPercentage_test);
bool testVar4=true;
for(int i=0; i<host_totFilesNumber[0]; i++)
{
if(host_matchingPercentage_result[i]!=host_matchingPercentage[i])
{
cout<<"Copying test Failed\n\n";
testVar4=false;
break;
}
}
if(testVar4)
{
cout<<"Copying test Passed\n\n";
}
cudaFree(dev_matchingPercentage);
}
and output i get is Copying test Failed
any suggestions what is wrong over here i am noob to cuda. Thanks for help mates :)