segmentation falt for the CUDA program cuda, segmentation falt

I wrote a short CUDA program, and when I compile it, it’s good. But when I run it, it appears “segmentation falt” and I don’t know why

Please help, thanks : )

it’is the main program

#include <stdio.h>

#include <stdlib.h>

#include <cutil_inline.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <thread_kernel.cu>

int main()

{

int n=8;

  int h_pixel[]={23,21,34,2,2,3,4,45};

  int h_poids[]={20,23,56,8,7,4,9,77};

  int *h_sum, *h_asum;

int *pixel;

  int *poids;

  int *sum;

  int *asum;

dim3 grid(1,1,1);

  dim3 block(n,1,1);

cudaMalloc((void**) &pixel,sizeof(int)*n);

  cudaMalloc((void**) &poids,sizeof(int)*n);

  cudaMalloc((void**) &sum,sizeof(int));

  cudaMalloc((void**) &asum,sizeof(int));

cudaMemcpy(pixel,h_pixel,sizeof(int)*n,cudaMemcpyHostToDevice);

  cudaMemcpy(poids,h_poids,sizeof(int)*n,cudaMemcpyHostToDevice);

thread_kernel<<<grid,block>>>(pixel,poids,sum,asum);

cudaMemcpy(h_sum,sum,sizeof(int),cudaMemcpyDeviceToHost);

  cudaMemcpy(h_asum,asum,sizeof(int),cudaMemcpyDeviceToHost);

printf("sum=%d\nasum=%d",*h_sum,*h_asum);

cudaFree(pixel);

  cudaFree(poids);

  cudaFree(sum);

  cudaFree(asum);

}

it’s the kernel

#include <stdio.h>

#include <stdlib.h>

#include <cutil_inline.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <math.h>

__global__ void thread_kernel(int *pixel,int *poids,int *sum,int *asum)

{

  int i=blockIdx.x*blockDim.x+threadIdx.x;

*sum=0;

  *asum=0;

*sum=(*sum)+pixel[i]*poids[i];

  *asum=(*asum)+poids[i];

}

Thanks a lot!!!

you don’t allocate storage for *h_sum, *h_asum in host.

modify code to

int h_sum, h_asum ;

cudaMemcpy( &h_sum, sum,sizeof(int),cudaMemcpyDeviceToHost);

cudaMemcpy( &h_asum, asum,sizeof(int),cudaMemcpyDeviceToHost);

printf(“sum=%d\nasum=%d”,h_sum,h_asum);

result is

sum=3465

asum=77