error:__eh_curr_region cuda

Hi, I have this error when I call the device function in global function, I only use the shared array in this device function, and the size is constant.

It’s very urgent…

Seriously, how do you possibly imagine someone can help you based on the information you have provided?

No code, no detailed description of the error, the operating system and CUDA version you are using. Nothing.

There are some very knowledgeable people who frequent these forums, but they can’t read minds…

Here is the code

#include <stdlib.h>

#include <cutil_inline.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <math.h>

__device__ void Product(int *pixel,int *poids,int *product)

{

  int i=blockIdx.x*blockDim.x+threadIdx.x;

  int n=16;

if(i<n)

  {

	product[i]=pixel[i]*poids[i];

  }

}

__device__ void sum_1(int *product,int n,int *sum1)

{

  __shared__ int s_data[512];

  int i=blockIdx.x*blockDim.x+threadIdx.x;

  int tid=threadIdx.x;

s_data[tid]=(i<n) ? (product[i]):0;

  __syncthreads();

for(int dist=blockDim.x/2;dist>0;dist/=2)

  {

	if(tid<dist)

	{

	  s_data[tid]+=s_data[tid+dist];

	}

	__syncthreads();

}

if(tid==0)

  {

	sum1[blockIdx.x]=s_data[0];

  }

}

__device__ int sum_2(int *sum1,int num_blocks)

{

  __shared__ int s_data[512];

  int i=threadIdx.x;

  int tid=threadIdx.x;

  int *sum2=s_data;

// reading from global memory, writing to shared memory

  s_data[tid]=(i<num_blocks) ? (sum1[i]):0;

  __syncthreads();

// do reduction in shared memory

  for(int dist=num_blocks/2;dist>0;dist/=2)

  {

	if(tid<dist)

	{

	  s_data[tid]+=s_data[tid+dist];

	}

	__syncthreads();

}

if(tid==0)   *sum2=s_data[0];

  return *sum2;

}

__global__ void sum(int *pixel,int *poids,int *product,int *sum,int n)

{

  int sum1[n*sizeof(int)];

  int num_block=n/4+((n%4==0)?0:1);

Product(pixel, poids, product);

  sum_1(product, n, sum1);

  *sum=sum_2(sum1,num_block);

}				

int main()

{

int n=16;

  int i;

  int h_pixel[]={23,21,34,2,2,3,4,45,55,69,5,3,5,8,7,9};

  int h_poids[]={20,23,56,8,7,4,9,77,23,4,6,4,2,3,1,4};

  int *sum;

  int *h_sum;

int *product;

  int *h_product;

int num_blocks=n/4+((n%4==0)? 0:1);

  int block_size=4;

cudaMalloc((void**) &pixel,sizeof(int)*n);

  cudaMalloc((void**) &poids,sizeof(int)*n);

  cudaMalloc((void**) &sum,sizeof(int)*num_blocks);

  cudaMalloc((void**) &product,sizeof(int)*n);

h_sum=(int *)malloc(sizeof(int));

  h_product=(int *)malloc(sizeof(int)*n);

cudaMemcpy(pixel,h_pixel,sizeof(int)*n,cudaMemcpyHostToDevice);

  cudaMemcpy(poids,h_poids,sizeof(int)*n,cudaMemcpyHostToDevice);

kernel<<<num_blocks,block_size>>>(pixel,poids,product,sum,n);

cudaMemcpy(h_product,product,sizeof(int)*n,cudaMemcpyDeviceToHost);

  cudaMemcpy(h_sum,sum,sizeof(int),cudaMemcpyDeviceToHost);

for(i=0;i<n;i++)  printf("product=%d\n",h_product[i]);

  printf("sum=%d\n",h_sum[0]);

cudaFree(pixel);

  cudaFree(poids);

  cudaFree(product);

  cudaFree(sum);

  free(h_product);

  free(h_sum);

in line “*sum=sum_2(sum1,num_block)”, it appears the error “identifier “__eh_curr_region” is undefined”.

You can’t declare shared memory in a device function.

I think it’s not a problem. I saw in SDK sample that they also declare shared memory in __device__function and it works well. you can see for example the sample “dxtc”

The nvcc error you are seeing means that there is some invalid or unresolvable static memory allocation detected (normally an array with a non integral value for the size), and it is happening during inline function expansion.

I don’t understand you very well, the size of the array declared as shared in device function is an integer. Can you explain it more specific?

The error is here:

__global__ void sum(int *pixel,int *poids,int *product,int *sum,int n)

{

  int sum1[n*sizeof(int)];

  int num_block=n/4+((n%4==0)?0:1);

Product(pixel, poids, product);

  sum_1(product, n, sum1);

  *sum=sum_2(sum1,num_block);

}

You cannot declare sum1 in that way. Runtime dimensioned arrays are illegal in C90.