improve my Frist program (computing pi)

Hi ,I’m using wallis program
this my code


// For the CUDA runtime routines (prefixed with “cuda_”)


  • CUDA Kernel Device code
  • Computes the vector addition of A and B into C. The 3 vectors have the same
  • number of elements numElements.
    global void pi ( float a, int num)
    int i =blockIdx.x
    blockDim.x + threadIdx.x;

if (i < num)

a[i]=(float) (4ii)/(4ii-1);



int main(void)
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

int num =50;
size_t size = num * sizeof(float);

// float *h_p=(float *) malloc(size2);

// Allocate the host input vector A
float *h_a = (float *)malloc(size);

// Initialize the host input vectors
for (int i = 1; i < num; i++)
    h_a[i] = rand()/(float)RAND_MAX;

// Allocate the device input vector A
float *d_a = NULL;
//float *p_a=NULL;
err = cudaMalloc((void **)&d_a, size);
 //err = cudaMalloc ((void **)&p_a, size);

// Copy the host input vectors A and B in host memory to the device input vectors in
// device memory
printf("Copy input data from the host memory to the CUDA device\n");
err = cudaMemcpy(d_a, h_a, size, cudaMemcpyHostToDevice);
if (err != cudaSuccess)
    fprintf(stderr, "Failed to copy vector A from host to device (error code %s)!\n", cudaGetErrorString(err));


int threadsPerBlock = num;

int blocksPerGrid =1;

pi (d_a,num);

// Copy the device result vector in device memory to the host result vector
// in host mory.
printf("Copy output data from the CUDA device to the host memory\n");
err = cudaMemcpy(h_a, d_a, size, cudaMemcpyDeviceToHost);
//err=cudaMemcpy(p_a, p_a, size, cudaMemcpyDeviceToHost);

if (err != cudaSuccess)
    fprintf(stderr, "Failed to copy vector a from device to host (error code %s)!\n", cudaGetErrorString(err));

// Verify that the result vector is correct

// Free host memory

// Reset the device and exit
err = cudaDeviceReset();

if (err != cudaSuccess)
    fprintf(stderr, "Failed to deinitialize the device! error=%s\n", cudaGetErrorString(err));
for (int i=1;i< num ;i++)

printf("%f \n" ,h_a[i]);

float f=1;
for (int i=1;i< num;i++)


printf(“pi est égal %f”,2*f);

return 0;

Please help me to improve my program
Thank you

how you compute pi ? a[i]=(float) (4ii)/(4ii-1); ???

i can give you a very little program compute Pi with 1000 or 10000 decimal

You can start by formatting the code properly on the forum. Put the code between [ code ] … [ / code ].