hallo ive tried and created my own code for dotVectors in cuda lang and
after all always i get “0” as a result and this is my code
thanks in advance for helping me out
TAB__A[ 1 , 4 , 9 , 8 , 2 , 5 , 1 , 1 , 5 , 7 ,]
TAB__B[ 7 , 0 , 4 , 8 , 4 , 5 , 7 , 1 , 2 , 6 ,]
We Calculate : 1 * 7 + 4 * 0 + 9 * 4 + 8 * 8 + 2 * 4 + 5 * 5 + 1 * 7 + 1 * 1 +
5 * 2 + 7 * 6
THE SUM IS : 0
#include <stdio.h>
#include <conio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <windows.h>
#define SIZE 10
#define min(a,b) (a<b?a:b)
const int threadperBlock = 256;
const int blockperGrid = min(32 , (SIZE+threadperBlock-1)/threadperBlock);
__global__ void DotVector(int *a , int *b , int *c){
int tid = threadIdx.x + blockIdx.x * blockDim.x;
__shared__ int cache[threadperBlock];
int outline = blockIdx.x * gridDim.x;
int cacheIdx = threadIdx.x;
int temp = 0;
while(tid<SIZE){
temp += a[tid] * b[tid];
tid += outline ;
}
cache[cacheIdx] = temp;
__syncthreads();
int i = blockDim.x/2;
while(i!=0){
if(cacheIdx<i){
cache[cacheIdx] += cache[cacheIdx + 1 ];
__syncthreads();
i/=2;
}
if(cacheIdx == 0 ){
c[blockIdx.x] = cache[0];
}
}
}
int main(){
int Sum = 0 ;
int *a , *b, *c ;
int *d_a, *d_b, *d_c ;
a=(int *)malloc(SIZE*sizeof(int));
b=(int *)malloc(SIZE*sizeof(int));
c=(int *)malloc(blockperGrid*sizeof(int));
cudaMalloc(&d_a,SIZE*sizeof(int));
cudaMalloc(&d_b,SIZE*sizeof(int));
cudaMalloc(&d_c,blockperGrid*sizeof(int));
for (int i = 0; i < SIZE; i++) {
a[i] = rand() %10;
b[i] = rand() %10;
c[i] =0;
}
cudaMemcpy(d_a ,a , SIZE*sizeof(int) ,cudaMemcpyHostToDevice);
cudaMemcpy(d_b ,b , SIZE*sizeof(int) ,cudaMemcpyHostToDevice);
cudaMemcpy(d_c ,c , blockperGrid*sizeof(int) ,cudaMemcpyHostToDevice);
DotVector<<<blockperGrid,threadperBlock>>>(d_a,d_b,d_c);
cudaMemcpy(c,d_c,blockperGrid*sizeof(int),cudaMemcpyDeviceToHost);
for(int i = 0 ; i<blockperGrid; i++){
printf(" %d \n" , c[i]);
Sum += c[i];
}
printf("TAB__A[ ");
for(int i = 0 ; i<SIZE; i++){
printf(" %d ," , a[i]);
}
printf("] \n" );
printf("TAB__B[ ");
for(int i = 0 ; i<SIZE; i++){
printf(" %d ," , b[i]);
}
printf("] \n" );
printf("We Calculate : \t" );
for(int i = 0 ; i<SIZE; i++){
if(i == SIZE-1){
printf("%d * %d " ,a[i], b[i]);
}else{
printf("%d * %d + " ,a[i], b[i]);
}
}
printf("\n" );
printf("THE SUM IS : %d \n",Sum);
free(a);
free(b);
free(c);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
}