Dear all,
the following code i got from Cuda Programming Guide Manual. But it is not showing correct output … do anybody have any clue why the code is acting like this… Code and corrosponding output is as follows … Thanks in advance for any help or advice.
with regards
sam :(
---------------------source code------------------------------------------------------------
/* Cuda GPU Based Program that use GPU processor for finding cosine of numbers */
/* --------------------------- header secton ----------------------------*/
#include<stdio.h>
#include<cuda.h>
#define ACOS_THREAD_CNT 10
#define N 100
/* --------------------------- target code ------------------------------*/
struct acosParams {
float *arg;
float *res;
int n;
};
global void acos_main(struct acosParams parms)
{
int i;
for (i = threadIdx.x; i < parms.n; i += ACOS_THREAD_CNT) {
parms.res[i] = acosf(parms.arg[i] ) ;
}
}
/* --------------------------- host code ------------------------------/
int main (int argc, char argv[])
{
int i = 0;
cudaError_t cudaStat;
float acosRes = 0;
float acosArg = 0;
float* arg = (float ) malloc(Nsizeof(arg[0]));
float* res = (float ) malloc(Nsizeof(res[0]));
struct acosParams funcParams;
/* … fill arguments array ‘arg’ … */
for(i=0; i < N ; i++ ){
arg[i] = (float)i ;
}
cudaStat = cudaMalloc ((void **)&acosArg, N * sizeof(acosArg[0]));
if( cudaStat )
printf(" value = %d : Memory Allocation on GPU Device failed\n", cudaStat);
cudaStat = cudaMalloc ((void **)&acosRes, N * sizeof(acosRes[0]));
if( cudaStat )
printf(" value = %d : Memory Allocation on GPU Device failed\n", cudaStat);
cudaStat = cudaMemcpy (acosArg, arg, N * sizeof(arg[0]), cudaMemcpyHostToDevice);
if( cudaStat )
printf(" Memory Copy from Host to Device failed\n", cudaStat);
funcParams.res = acosRes;
funcParams.arg = acosArg;
funcParams.n = N;
acos_main<<<1,ACOS_THREAD_CNT>>>(funcParams);
cudaStat = cudaMemcpy (res, acosRes, N * sizeof(acosRes[0]), cudaMemcpyDeviceToHost);
if( cudaStat )
printf(" value = %d : Memory Allocation on GPU Device failed\n", cudaStat);
for(i=0; i < N ; i++ ){
if ( i%10 == 0 )
printf("\n acosf(%f) = %f ", arg[i], res[i] );
}
}
-------------------------command used for compilation-----------------------------------------------
$ nvcc cuda-cos-finding.cu -use_fast_math
-------------------------output-----------------------------------------------------------------------------
$./a.out
acosf(0.001000) = 1.569796
acosf(10.001000) = nan
acosf(20.000999) = nan
acosf(30.000999) = nan
acosf(40.000999) = nan
acosf(50.000999) = nan
acosf(60.000999) = nan
acosf(70.000999) = nan
acosf(80.000999) = nan
acosf(90.000999) = nan