Hello everyone,
I am new to gpu programming, and I don’t success to sum two array of elements.
Here is my code and thank you for your time and your help.
main.cpp
void addWrapper(int* m1, int* m2, int* m3, size_t size);
int main() {
// Allocating memory in host.
size_t n = 10;
int* m1 = (int*)calloc(n, sizeof(int));
int* m2 = (int*)calloc(n, sizeof(int));
// Setting each element to 1 and 2.
for (size_t i = 0; i < n; i++) {
m1[i] = 1;
m2[i] = 2;
}
// Allocating memory for result and calling kernel function.
int* m3 = (int*)calloc(n, sizeof(int));
addWrapper(m1, m2, m3, n);
// Printing result.
for (int i = 0; i < 10; i++)
std::cout << m3[i] << " ";
return 0;
}
add.cu
#include <cuda.h>
__global__ void addKernel(int* d_m1, int* d_m2, int* d_m3, size_t size) {
for (size_t i = 0; i < size; i++) {
d_m3[i] = d_m1[i] + d_m2[i];
}
}
__host__ void addWrapper(int* m1, int* m2, int* m3, size_t size) {
// Pointer of arrays.
int* d_m1;
int* d_m2;
int* d_m3;
// Allocating in Device Memory.
cudaMalloc(&d_m1, size * sizeof(int));
cudaMalloc(&d_m2, size * sizeof(int));
cudaMalloc(&d_m3, size * sizeof(int));
// Copying in Device Memory.
cudaMemcpy(d_m1, m1, size * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_m2, m2, size * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_m3, m3, size * sizeof(int), cudaMemcpyHostToDevice);
// Calling the kernel function.
addKernel<<<1, 1>>>(d_m1, d_m2, d_m3, size);
// Copying Device Memory to Host Memory.
cudaMemcpy(m3, d_m3, size * sizeof(int), cudaMemcpyDeviceToHost);
// Freeing Device Memory.
cudaFree(d_m1);
cudaFree(d_m2);
cudaFree(d_m3);
}