something wrong with my share memory

Huang_Wei_Hao · November 13, 2008, 3:55am

VC2005 compiler shows the error message ~

=================
1>Edge_Detection.cu
1>c:\program files\nvidia corporation\nvidia cuda sdk\projects\00_edge_detection_66_2\Edge_Detection_kernel.cu(109): error: expression must have (pointer-to-) function type
1>c:\program files\nvidia corporation\nvidia cuda sdk\projects\00_edge_detection_66_2\Edge_Detection_kernel.cu(116): error: expression must have (pointer-to-) function type
1>c:\program files\nvidia corporation\nvidia cuda sdk\projects\00_edge_detection_66_2\Edge_Detection_kernel.cu(116): error: expression must have (pointer-to-) function type
1>c:\program files\nvidia corporation\nvidia cuda sdk\projects\00_edge_detection_66_2\Edge_Detection_kernel.cu(126): error: expression must have (pointer-to-) function type
1>c:\program files\nvidia corporation\nvidia cuda sdk\projects\00_edge_detection_66_2\Edge_Detection_kernel.cu(126): error: expression must have (pointer-to-) function type
1>5 errors detected in the compilation of “C:\DOCUME~1\Eric\LOCALS~1\Temp/tmpxft_00000934_00000000-6_Edge_Detection.cpp1.ii”.

===============

I have no idea about this problem …
It seems to be illegal that I copy the data from global memory to shared memory (in Edge_Detection_kernel.cu)…
But … compiler didn’t pass this …
Does anyone know how to solve this ~ ?
I write the note to explain where the error is …

============= Edge_Detection.cu ======================

#include “Edge_Detection_kernel.cu”
#include <cutil.h>
#include “Edge.h”

//*** Image Data
unsigned char *Dev_img_data = NULL ;
float *Dev_Result_data = NULL ;
int size_img_data ;
int size_result_img_data ;

void Copy_IMG_to_cu(unsigned char *img_data , float *Result_img , int width , int height)
{
unsigned int total_timer ; float total_elapsedTimeInMs ;
CUT_SAFE_CALL( cutCreateTimer( &total_timer ) );
CUT_SAFE_CALL( cutStartTimer( total_timer));

//Create Image Memory (global memory)
size_img_data = widthheight3sizeof(unsigned char) ;
size_result_img_data = widthheight*sizeof(float) ;
cudaMalloc((void**) &Dev_img_data , size_img_data );
cudaMalloc((void**) &Dev_Result_data , size_result_img_data );

//Reset
cudaMemset( Dev_img_data , 0 , size_img_data ) ;
cudaMemset( Dev_Result_data , 0 , size_result_img_data ) ;

//COPY
cudaMemcpy( Dev_img_data , img_data , size_img_data , cudaMemcpyHostToDevice) ;
cudaBindTexture( 0 , tex_img_data , Dev_img_data) ;

// Run Kernel Edge Detection //
int Calculation = width * height ;
int Basic_Block = Calculation/MAX_Threads + (0!=(Calculation%MAX_Threads)) ;
int Dev_Block = Basic_Block ;

Kernel_Share_Edge_Detect<<< Dev_Block , MAX_Threads >>>( width , height , Dev_Result_data , Basic_Block) ;

//COPY EDGE IMAGE to CPU
cudaMemcpy(Result_img , Dev_Result_data , size_result_img_data , cudaMemcpyDeviceToHost) ;

CUT_SAFE_CALL( cutStopTimer( total_timer));
total_elapsedTimeInMs = cutGetTimerValue( total_timer);
printf(" Edge Detection costs total time <<<%f ms>>> \n",total_elapsedTimeInMs);
CUT_SAFE_CALL( cutDeleteTimer( total_timer));

}

====================================================

============= Edge_Detection_kernel.cu ======================
#include “Edge.h”
texture< unsigned char , 1 , cudaReadModeElementType> tex_img_data ;

global void
Kernel_Share_Edge_Detect(int width , int height , float *Dev_Result_data , int Basic_Block)
{
int tx = threadIdx.x ;
int bx = blockIdx.x ;

 //Create Share Memory
__shared__ float S_Img_data[Height][Width] ;

int img_x = (bxMAX_Threads+tx) % width ;
int img_y = (bxMAX_Threads+tx) / width ;

S_Img_data(img_y , img_x) = (float)tex1Dfetch(tex_img_data , img_ywidth3+img_x*3+2 ); // <–error

__syncthreads() ;

//========== Horizontal ========//
float a = S_Img_data(img_y , img_x+1) - S_Img_data(img_y , img_x); // <–error
if( img_x <= (width-1) )
if(a>20 || a<-20)
{
Dev_Result_data[img_y*width + img_x] = 255 ;
}

__syncthreads() ;

//========== Vertical ========//
a = S_Img_data(img_y+1 , img_x) - S_Img_data(img_y , img_x); // <–error
if( img_y <= (height-1) )
if(a>20 || a<-20)
{
Dev_Result_data[img_y*width + img_x] = 255 ;
}

__syncthreads() ;

}

tmurray · November 13, 2008, 6:59am

Uh, why do you have parens and not brackets?

Huang_Wei_Hao · November 13, 2008, 10:17am

Excuse me~~

what is parens???

which brackets I missed ???

THANKS~~

Ailleur · November 13, 2008, 1:44pm

S_Img_data(img_y , img_x)

S_Img_data[img_y,img_x]

_Big_Mac · November 14, 2008, 2:59pm

Is this correct?

Kernel_Share_Edge_Detect(int width , int height , float *Dev_Result_data , int Basic_Block)

{

(…)

//Create Share Memory

shared float S_Img_data[Height][Width] ;

C is case sensitive.

And out of curiosity, how does this work and is this an intended hack?

int Basic_Block = Calculation/MAX_Threads + (0!=(Calculation%MAX_Threads))

You’re basically adding a boolean value to an int. AFAIK this translates to:

int Basic_Block = Calculation/MAX_Threads;

if((Calculation%MAX_Threads)!=0)

	Basic_Block+=1;