I am new to CUDA. I have wrote a simple program to add two vectors of specified length. The program works fine until i exceed a specific limit of elements to be added.
When the NumOfElements is 83888 it works fine,but when i increase this value ten times i.e. 838880, segmentation fault occurs.
i have enough memory to hold these values as i am running it on GFORCE 930MX with ~2GB on global memory and ~8GB of host memory. These 3 vectors of specified length will take no longer then 838880 elements x 3 Arrays x 4 bytes each = 8MB
I debug the code and found that when the IP is at printf(“here0\n”);, segmentation fault occurs.
Here is the sample code which i am running.
/*
#include <cuda_runtime_api.h>
#include <cuda.h>
#include <stdio.h>
#include <numeric>
#include <stdlib.h>
#include <stdint.h>
#define NumOfElements <b>83888</b> // 838880 (segmentation fault)
#define NumOfThreadsPerBlock 128
__global__ void add(int32_t *a,int32_t *b,int32_t *res_dev){
int32_t tid = threadIdx.x + (blockIdx.x * blockDim.x);
while(tid < NumOfElements){
res_dev[tid] = a[tid] + b[tid];
tid += blockDim.x * gridDim.x;
}
}
int main(void){
printf("here0\n");
int32_t a[NumOfElements],b[NumOfElements],res_host[NumOfElements];
int32_t i;
for(i=0;i<NumOfElements-1;i++){
a[i] = i;
b[i] = i+1;
}
int32_t NumOfBlocks = (NumOfElements+(NumOfThreadsPerBlock-1))/NumOfThreadsPerBlock;
int32_t *a_dev,*b_dev,*res_dev;
// Allocate memory on device
cudaMalloc((void**)&a_dev,sizeof(int32_t)*NumOfElements);
cudaMalloc((void**)&b_dev,sizeof(int32_t)*NumOfElements);
cudaMalloc((void**)&res_dev,sizeof(int32_t)*NumOfElements);
// Copy vectors from host to device
cudaMemcpy((void*)a_dev,(void*)&a,sizeof(int32_t)*NumOfElements,cudaMemcpyHostToDevice);
cudaMemcpy((void*)b_dev,(void*)&b,sizeof(int32_t)*NumOfElements,cudaMemcpyHostToDevice);
// Launch kernel
printf("here1\n");
add<<<NumOfBlocks,NumOfThreadsPerBlock>>>(a_dev,b_dev,res_dev);
printf("here2\n");
cudaMemcpy((void*)&res_host,(void*)res_dev,sizeof(int32_t)*NumOfElements,cudaMemcpyDeviceToHost);
return 0;
}
Would like to know why the segmentation fault occurs.
THANKS