Need some help with my code to add tow arrays and print them in couda (c[i] = a[i] + b[i])

Hello every one :rolleyes:

I’m beginner :unsure: in this field I wanted help with my mess code

I need to write a program to practice CUDA programming on ther GPU server .So, In my program I want to make two arrays (each has 256 elements)and in my main function, I want to randomly assign a number (<= 4) to each element in these two arrays and I want to use GPU to add up these two arrays (the the same number of threads 256 threads) like this (c[i] = a[i] + b[i]) for the third array. finally , the main function print out the third array.

I need to make this code work :angry:

//////////////////////////////////////////////////////////////////////////////

#include <stdio.h>

#define SIZE 256

#define BLOCKSIZE 1

__global__ void sumArrays(int *a, int *b, int *c)

/* 

1-Thread scans subset of array elements

2-Call device function to compare with “6”

3-Compute local result

*/

{

				int idx = blockIdx.x * blockDim.x + threadIdx.x;

				c[idx] = a[idx] + b[idx]

}

__device__ int compare(int a, int b) 

{

  if (a == b) return 1;

  return 0;

}

//1-Allocate memory on device for copy of input and output

/*2-Copy input to device

  3-Set up grid/block

  4-Call global function

  6-Copy device output to host*/

/*Allocate memory on device for copy of input and output*/

__host__ void outer_compute	

{

(int *in_arr, int *out_arr);

int *a_in_array, *b_out_array;

cudaMalloc((void **) &d_in_array, 	SIZE*sizeof(int));

cudaMalloc((void **) &d_out_array, 	BLOCKSIZE*sizeof(int));

}

//copy input to device

cudaMemcpy(d_in_array, h_in_array, SIZE*sizeof(int),cudaMemcpyHostToDevice);

//set up grid/block

msize = (SIZE+BLOCKSIZE) * 	sizeof (int);

   compute<<<1,BLOCKSIZE,msize)>>> 	(d_in_array, d_out_array);

//call global function

//copy device output to host

cudaMemcpy(h_out_array,d_out_array, 	BLOCKSIZE*sizeof(int), 	cudaMemcpyDeviceToHost);

//call global function

}

//call host function

__host __ void outer_compute(int *a, int *b, int *c)

int main()

{

//Initialization

/*1-Allocate memory on host for input and output

2-Assign random numbers to input array

3-Call host function

4-Calculate final output from per-thread output

5-Print result*/

int main(void)

{

int *a, *b,*c;

int sum=0;

  /* initialization */ 

  outer_compute(int a*, int b*,int c*);

for (int i=0; i<BLOCKSIZE; i++) 

{

	sum+=out_array[i];

}

printf ("Result = %d\n",sum);

  scan()

}

Hello every one :rolleyes:

I’m beginner :unsure: in this field I wanted help with my mess code

I need to write a program to practice CUDA programming on ther GPU server .So, In my program I want to make two arrays (each has 256 elements)and in my main function, I want to randomly assign a number (<= 4) to each element in these two arrays and I want to use GPU to add up these two arrays (the the same number of threads 256 threads) like this (c[i] = a[i] + b[i]) for the third array. finally , the main function print out the third array.

I need to make this code work :angry:

//////////////////////////////////////////////////////////////////////////////

#include <stdio.h>

#define SIZE 256

#define BLOCKSIZE 1

__global__ void sumArrays(int *a, int *b, int *c)

/* 

1-Thread scans subset of array elements

2-Call device function to compare with “6”

3-Compute local result

*/

{

				int idx = blockIdx.x * blockDim.x + threadIdx.x;

				c[idx] = a[idx] + b[idx]

}

__device__ int compare(int a, int b) 

{

  if (a == b) return 1;

  return 0;

}

//1-Allocate memory on device for copy of input and output

/*2-Copy input to device

  3-Set up grid/block

  4-Call global function

  6-Copy device output to host*/

/*Allocate memory on device for copy of input and output*/

__host__ void outer_compute	

{

(int *in_arr, int *out_arr);

int *a_in_array, *b_out_array;

cudaMalloc((void **) &d_in_array, 	SIZE*sizeof(int));

cudaMalloc((void **) &d_out_array, 	BLOCKSIZE*sizeof(int));

}

//copy input to device

cudaMemcpy(d_in_array, h_in_array, SIZE*sizeof(int),cudaMemcpyHostToDevice);

//set up grid/block

msize = (SIZE+BLOCKSIZE) * 	sizeof (int);

   compute<<<1,BLOCKSIZE,msize)>>> 	(d_in_array, d_out_array);

//call global function

//copy device output to host

cudaMemcpy(h_out_array,d_out_array, 	BLOCKSIZE*sizeof(int), 	cudaMemcpyDeviceToHost);

//call global function

}

//call host function

__host __ void outer_compute(int *a, int *b, int *c)

int main()

{

//Initialization

/*1-Allocate memory on host for input and output

2-Assign random numbers to input array

3-Call host function

4-Calculate final output from per-thread output

5-Print result*/

int main(void)

{

int *a, *b,*c;

int sum=0;

  /* initialization */ 

  outer_compute(int a*, int b*,int c*);

for (int i=0; i<BLOCKSIZE; i++) 

{

	sum+=out_array[i];

}

printf ("Result = %d\n",sum);

  scan()

}