I wrote a short CUDA program, and when I compile it, it’s good. But when I run it, it appears “segmentation falt” and I don’t know why
Please help, thanks : )
it’is the main program
#include <stdio.h>
#include <stdlib.h>
#include <cutil_inline.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <thread_kernel.cu>
int main()
{
int n=8;
int h_pixel[]={23,21,34,2,2,3,4,45};
int h_poids[]={20,23,56,8,7,4,9,77};
int *h_sum, *h_asum;
int *pixel;
int *poids;
int *sum;
int *asum;
dim3 grid(1,1,1);
dim3 block(n,1,1);
cudaMalloc((void**) &pixel,sizeof(int)*n);
cudaMalloc((void**) &poids,sizeof(int)*n);
cudaMalloc((void**) &sum,sizeof(int));
cudaMalloc((void**) &asum,sizeof(int));
cudaMemcpy(pixel,h_pixel,sizeof(int)*n,cudaMemcpyHostToDevice);
cudaMemcpy(poids,h_poids,sizeof(int)*n,cudaMemcpyHostToDevice);
thread_kernel<<<grid,block>>>(pixel,poids,sum,asum);
cudaMemcpy(h_sum,sum,sizeof(int),cudaMemcpyDeviceToHost);
cudaMemcpy(h_asum,asum,sizeof(int),cudaMemcpyDeviceToHost);
printf("sum=%d\nasum=%d",*h_sum,*h_asum);
cudaFree(pixel);
cudaFree(poids);
cudaFree(sum);
cudaFree(asum);
}
it’s the kernel
#include <stdio.h>
#include <stdlib.h>
#include <cutil_inline.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <math.h>
__global__ void thread_kernel(int *pixel,int *poids,int *sum,int *asum)
{
int i=blockIdx.x*blockDim.x+threadIdx.x;
*sum=0;
*asum=0;
*sum=(*sum)+pixel[i]*poids[i];
*asum=(*asum)+poids[i];
}
Thanks a lot!!!