CUDA errors identifier (global/device) is undefined, no suitable conversion,...

Im doing a project on Photon mapping. I coded raytracer part and it ran successfully on CPU. Now im doing the same on GPU(through ssh). I searched for those kind of errors in web and this forum but couldnt find a good solution.

im getting the following errors

nvcc -c -lSDL -lGL -lGLU AntTweakBar.a gpuRayTracer.cu

gpuRayTracer.cu(44): error: identifier "raytracer" is undefined

gpuRayTracer.cu(53): error: no suitable conversion function from "Float3" to "void *" exists

gpuRayTracer.cu(55): error: no suitable conversion function from "Float3" to "void *" exists

gpuRayTracer.cu(76): error: identifier "GPUsub" is undefined

gpuRayTracer.cu(77): error: identifier "GPUnormalize" is undefined

gpuRayTracer.cu(78): error: identifier "GPUcross" is undefined

gpuRayTracer.cu(80): error: calling a host function from a __device__/__global__ function is not allowed

gpuRayTracer.cu(90): error: identifier "GPUmul" is undefined

gpuRayTracer.cu(95): error: calling a host function from a __device__/__global__ function is not allowed

gpuRayTracer.cu(95): error: identifier "GPUadd" is undefined

gpuRayTracer.cu(192): error: calling a host function from a __device__/__global__ function is not allowed

15 errors detected in the compilation of "/tmp/tmpxft_0000432c_00000000-4_gpuRayTracer.cpp1.ii".

make: *** [gpuRayTracer.o] Error 2

gpuRayTracer.cu consists of

line 44, 53, 55(errors) are marked in the below code

Float3 used below is a structure containing 3 float variables(x,y,z coordinates)

void Scene::GPUrayTracer(){

Object *d_objectList[OBJ_MAX];

GLubyte 	* d_pixels;

int *d_Width, *d_Height;

Float3 *d_eye,*d_lookAt;

int *d_objectCount;

size_t size1=sizeof(Float3);

size_t size2=sizeof(int);

size_t size3=sizeof(GLubyte);

//size_t size4=sizeof(Object);

cudaMalloc(&d_eye,size1);

cudaMalloc(&d_lookAt,size1);

cudaMemcpy(d_eye,&this->eye,size1,cudaMemcpyHostToDevice);

cudaMemcpy(d_lookAt,&this->lookAt,size1,cudaMemcpyHostToDevice);

cudaMalloc(&d_objectCount,size2);

cudaMemcpy(d_objectCount,&this->objectCount,size2,cudaMemcpyHostToDevice);

cudaMalloc(&d_Width,size2);

cudaMalloc(&d_Height,size2);

cudaMemcpy(d_Width,&this->screenWidth,size2,cudaMemcpyHostToDevice);

cudaMemcpy(d_Height,&this->screenHeight,size2,cudaMemcpyHostToDevice);

cudaMalloc(&d_pixels,size3);

cudaMemcpy(d_pixels,&this->pixels,size3,cudaMemcpyHostToDevice);

cudaMalloc((void **)&d_objectList,

(sizeof(this->objectList)));

cudaMemcpy(d_objectList,

&this->objectList,

sizeof(this->objectList),cudaMemcpyHostToDevice);

line 44:raytracer<<<1,500>>>(d_pixels,d_Width,d_Height,d_objectList,d_eye,d_lookAt);

cudaMemcpy((this->objectList),&d_objectList,sizeof(this->objectList),cudaMemcpyDeviceToHost);

cudaMemcpy(this->pixels,&d_pixels,size3,cudaMemcpyDeviceToHost);

cudaMemcpy((int *)this->screenWidth,&d_Width,size2,cudaMemcpyDeviceToHost);

cudaMemcpy((int *)this->screenHeight,&d_Height,size2,cudaMemcpyDeviceToHost);

cudaMemcpy((int *)this->objectCount,&d_objectCount,size2,cudaMemcpyDeviceToHost);

cudaMemcpy(

line:53   (void *)this->eye,

(void *)&d_eye,sizeof(d_eye),cudaMemcpyDeviceToHost);

line:55  cudaMemcpy(this->lookAt,(void *)&d_lookAt,sizeof(d_lookAt),cudaMemcpyDeviceToHost);

}

__global__ void raytracer( unsigned char *out_data,const int screenWidth,const int screenHeight,Object * objectList,Float3 eye,Float3 lookAt,int objectCount)

{

int x = blockDim.x * BLOCK_SIZE + threadIdx.x;

	int y = blockDim.y * BLOCK_SIZE + threadIdx.y;

	<b>//code goes here</b>

}

__device__ float GPUffminf(float a, float b){

if(a<b)

	return a;

return b;

}

__device__ float GPUffmaxf(float a, float b){

	if(a>b)

	return a;

return b;

}

__device__ float GPUmag(Float3 a){

float res;

res=a.x*a.x+a.y*a.y+a.z*a.z;

res=sqrt(res);

return res;

}

__device__ Float3 GPUnormalize(Float3 a){

Float3 res;

float magn=mag(a);

if(magn!=0){

magn=(float)1.0/magn;

res.x=a.x*magn;

res.y=a.y*magn;

res.z=a.z*magn;

return res;

}

return a;

}

__device__ Float3 GPUcross(Float3 a ,Float3 b){

Float3 res;

res.x=a.y*b.z-a.z*b.y;

res.y=a.z*b.x-a.x*b.z;

res.z=a.x*b.y-a.y*b.x;

return res;

}

__device__  float GPUdot(Float3 a,Float3 b){

return (float)(a.x*b.x + a.y*b.y + a.z*b.z);

}

__device__  Float3 GPUsub(Float3 a,Float3 b){

Float3 res;

res.x=a.x-b.x;

res.y=a.y-b.y;

res.z=a.z-b.z;

return res;

}

__device__ Float3 GPUadd(Float3 a,Float3 b){

Float3 res;

res.x=a.x+b.x;

res.y=a.y+b.y;

res.z=a.z+b.z;

return res;

}

__device__ Float3 GPUmul(Float3 a,float b){

Float3 res;

res.x=a.x*b;

res.y=a.y*b;

res.z=a.z*b;

return res;

}

can anyone tell me how to solve these errors…??

[b]apart from this i have few questions

*The order in which .cu/.cpp files are compiled…is it matter??

*Should the kernel be invoked only from main.cpp??

*If so, should a .cu file consists of only global/device functions ??[/b]

I’m not going to say that I know the right answer, but it looks to me like you’re calling functions before they’ve been defined. Try putting some function declarations at the top, or in a separate header file and see what happens.

Also, I think the cudaMemcpy calls need (void **), but don’t quote me on that.

Compiling shouldn’t be a huge deal, so long as you use nvcc in the end as your linker.

I think.

  • the order of compilation does not matter at all.

  • kernel can be invoked from any source file as long as that file ends with .cu suffix (it can be change by nvcc switches), remember to have included declaration of kernels you would like to execute.

  • It can also has host code as well.