improve my Frist program (computing pi)

Hi ,I’m using wallis program
this my code

#include

// For the CUDA runtime routines (prefixed with “cuda_”)
#include

/**

  • CUDA Kernel Device code
  • Computes the vector addition of A and B into C. The 3 vectors have the same
  • number of elements numElements.
    */
    global void pi ( float a, int num)
    {
    int i =blockIdx.x
    blockDim.x + threadIdx.x;

//printf(“%d”,i);
if (i < num)
{

a[i]=(float) (4ii)/(4ii-1);
//a[i]=i;

}

}

int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

int num =50;
size_t size = num * sizeof(float);

//
// float *h_p=(float *) malloc(size2);

// Allocate the host input vector A
float *h_a = (float *)malloc(size);


// Initialize the host input vectors
for (int i = 1; i < num; i++)
{
    h_a[i] = rand()/(float)RAND_MAX;
    
}

// Allocate the device input vector A
float *d_a = NULL;
//float *p_a=NULL;
err = cudaMalloc((void **)&d_a, size);
 //err = cudaMalloc ((void **)&p_a, size);






// Copy the host input vectors A and B in host memory to the device input vectors in
// device memory
printf("Copy input data from the host memory to the CUDA device\n");
err = cudaMemcpy(d_a, h_a, size, cudaMemcpyHostToDevice);
//err=cudaMemcpy(p_a,p_a,size2,cudaMemcpyHostToDevice);
if (err != cudaSuccess)
{
    fprintf(stderr, "Failed to copy vector A from host to device (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
}



printf("test");

int threadsPerBlock = num;

int blocksPerGrid =1;

pi (d_a,num);


// Copy the device result vector in device memory to the host result vector
// in host mory.
printf("Copy output data from the CUDA device to the host memory\n");
err = cudaMemcpy(h_a, d_a, size, cudaMemcpyDeviceToHost);
//err=cudaMemcpy(p_a, p_a, size, cudaMemcpyDeviceToHost);

if (err != cudaSuccess)
{
    fprintf(stderr, "Failed to copy vector a from device to host (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
}

// Verify that the result vector is correct


// Free host memory
//free(h_a);


// Reset the device and exit
err = cudaDeviceReset();

if (err != cudaSuccess)
{
    fprintf(stderr, "Failed to deinitialize the device! error=%s\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
}
for (int i=1;i< num ;i++)

{
printf(“%f \n” ,h_a[i]);

}
float f=1;
for (int i=1;i< num;i++)

f=(float)h_a[i]*f;

printf(“pi est égal %f”,2*f);

free(h_a);
return 0;

}
Please help me to improve my program
Thank you

how you compute pi ? a[i]=(float) (4ii)/(4ii-1); ???

i can give you a very little program compute Pi with 1000 or 10000 decimal

You can start by formatting the code properly on the forum. Put the code between [ code ] … [ / code ].