Hey guys!! I’m new to cuda and having a problem with my code. The problem is that the code runs fine for a relatively smaller array size but if the array size is set to like 100000 i get an error that the device driver has stopped responding. I’ve a 8800GT. Any help would be highly appreciated.The code is as follows.
#include <cuda.h>
#include <stdio.h>
#include <cuda_runtime.h>
#include <conio.h>
#include “device_launch_parameters.h”
#include <synchronizationerrors.h>
#include
global void autocorr(int n,int *dev_array1,int *dev_array2){
int tid = threadIdx.x + blockIdx.x * blockDim.x;
int sum = 0;
if(tid<n){
for(int j=0;j<n-tid;j++)
{
sum+= dev_array1[j] * dev_array1[j+tid];
}
dev_array2[tid] = sum;
tid += blockDim.x * gridDim.x;
}
}
int main(){
int n = 0;
int r;
int *array1; //host array 1.
int *array2; //host array 2.
int *dev_array1; //device array 1.
int *dev_array2; //device array 2.
printf("Enter no. of elements: ");
scanf_s("%d",&n);
printf("\n\n");
array1 = (int *)malloc(n * sizeof (int));
array2 = (int *)malloc(n * sizeof (int));
cudaMalloc( (void**)&dev_array1, n * sizeof(int) );
cudaMalloc( (void**)&dev_array2, n * sizeof(int) );
for(int i=0;i<n;i++){
array1[i] = i;
}
cudaMemcpy(dev_array1,array1,n * sizeof(int),cudaMemcpyHostToDevice); //copying elements of input array to device array.
autocorr<<<(n+499)/500,500>>>(n,dev_array1,dev_array2); //The number of threads(500) is just an arbitrary number.
cudaMemcpy(array2,dev_array2,n*sizeof(int),cudaMemcpyDeviceToHost);
printf("\nAutocorrelated array\n\n");
for(int i=0;i<n;i++){
printf("\n%d",array2[i]);
}
_getch();
return 0;
}