Calling kernels from a c-File Linking problem

Hello everyone!

I am trying to call several kernels from a c-File. I am actually working with other librariers which are not working properly with cu-Files, so I have to use a c-File.

c-File linkTest.c:

#include<stdio.h>

#include<stdlib.h>

#include</home/cs071011/cvode_single/examples/myexamples/kernelProt.h>

void main() {

	float *hV, *dV, c = 2.0f;

	int i;

	const int N = 100000;

	size_t size = N * sizeof(float);

	  myCudaMalloc(dV, size);

	hV = (float *) malloc(size);

	for(i = 0; i < N; i++) {

		hV[i] = 2.0f;

	}

	

	myCpyHost(dV, hV, size);

	myKernelFunc(dV, c, N);

	myCpyDevice(hV, dV, size);

	myCudaFree(dV);

	free(hV);

}

Header file kernelProt.h with function prototypes:

#include<stdio.h>

void myKernelFunc(float *memGpu, float c, const int N);

void myCpyHost(float *memGpu, float *memCpu, size_t size);

void myCpyDevice(float *memCpu, float *memGpu, size_t size);

void myCudaMalloc(float *memGpu, size_t size);

void myCudaFree(float *memGpu);

cu-File kernelFile.cu including the kernels:

#include<stdio.h>

#include<cuda.h>

void myCpyHost(float *memGpu, float *memCpu, size_t size) {

	cudaMemcpy(memGpu, memCpu, size, cudaMemcpyHostToDevice);

}

void myCpyDevice(float *memCpu, float *memGpu, size_t size) {

	cudaMemcpy(memCpu, memGpu, size, cudaMemcpyDeviceToHost);

}

void myCudaMalloc(float *memGpu, size_t size) {

	cudaMalloc((void **) &memGpu, size);

}

void myCudaFree(float *memGpu) {

	cudaFree(memGpu);

}

__global__ void myKernel(float *dx, float c, const int N) {

	int tid = threadIdx.x + blockDim.x * blockIdx.x;

	if(tid < N) {

		dx[tid] *= c;

	}

}

void myKernelFunc(float *memGpu, float c, const int N) {

	

	const int NUM_THREADS = 256;

	const int NUM_BLOCKS = (N + NUM_THREADS - 1) / NUM_THREADS;

	myKernel<<<NUM_BLOCKS, NUM_THREADS>>>(memGpu, c, N);	

}

I am using a Makefile, which looks like this:

SHELL = /bin/bash

prefix	   = /home/cs071011/cvode_single

mpi		  = /home/cs071011/mpich2-install

exec_prefix  = ${prefix}

includedir   = ${prefix}/include

includedir2  = ${mpi}/include 

libdir	   = ${exec_prefix}/lib

# Additional libraries needed by the project

CPP	  = cc -E

CPPFLAGS = 

CC	   = cc

CFLAGS   = -g -pg -O2

LDFLAGS  = 

LIBS	 =  -L/home/cs071011/cuda-inst/cuda/lib /home/cs071011/cuda-inst/cuda/include/kernelFile.o -l cublas -l cudart -lm

INCLUDES = -I${includedir} -I${includedir2}

LIBRARIES = -lsundials_cvode -lsundials_nvecserial ${LIBS}

LIBRARIES_BL = 

EXAMPLES = linkTest

OBJECTS = ${EXAMPLES:=.o}

NVECTORPATH = /home/cs071011/cvode-2.6.0/src/nvec_ser

CVODEPATH = /home/cs071011/cvode-2.6.0

# -----------------------------------------------------------------------------------------

.SUFFIXES : .o .c

.c.o :

	${CC} ${CPPFLAGS} ${CFLAGS} ${INCLUDES} -c $<

# -----------------------------------------------------------------------------------------

all: ${OBJECTS}

	@for i in ${EXAMPLES}; do \

		echo "${CC} -o a.out $${i}.o ${CFLAGS} ${LDFLAGS} -L${libdir} ${LIBRARIES} ${LIBRARIES_BL}"; \

		${CC} -o a.out $${i}.o ${CFLAGS} ${LDFLAGS} -L${libdir} ${LIBRARIES} ${LIBRARIES_BL}; \

	done

clean:

	rm -f ${OBJECTS}

	rm -f ${EXAMPLES}

# -----------------------------------------------------------------------------------------

I have added the path of the object (/home/cs071011/cuda-inst/cuda/include/kernelFile.o) to the LIBS thing, but there might be still a linking problem.

Shell output, when error occurs:

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ make

cc -o a.out linkTest.o -g -pg -O2 -L/home/cs071011/cvode_single/lib -lsundials_cvode -lsundials_nvecserial -L/home/cs071011/cuda-inst/cuda/lib /home/cs071011/cuda-inst/cuda/include/kernelFile.o -l cublas -l cudart -lm

linkTest.o: In function `main’:

/home/cs071011/cvode_single/examples/myexamples/linkTest.c:14: undefined reference to `myCudaMalloc’

/home/cs071011/cvode_single/examples/myexamples/linkTest.c:22: undefined reference to `myCpyHost’

/home/cs071011/cvode_single/examples/myexamples/linkTest.c:24: undefined reference to `myKernelFunc’

/home/cs071011/cvode_single/examples/myexamples/linkTest.c:26: undefined reference to `myCpyDevice’

/home/cs071011/cvode_single/examples/myexamples/linkTest.c:28: undefined reference to `myCudaFree’

collect2: ld gab 1 als Ende-Status zurück

make: *** [all] Fehler 1

Can somebody help me out?

Regards.

AL.

You need to compile your kernelFile.cu with nvcc and pass the resulting object file to the linker. Your Makefile won’t work with CUDA without changes.

Thank you for your response.

I actually did compile my kernelFile.cu with nvcc -c kernelFile.cu and added the path of the object in the makefile. So, how do I actually pass the object correctly to the linker? How should the respective code line for the makefile look like?

Thanks for further assistances.

Regards.
AL

Sorry, I did not see that the object file created by nvcc is in the linkline.

So the linkline looks correct, but the makefile seems not. The linkline is executed before the object file of linkTest.c is compiled. And adding the cuda-object file of nvcc in the LIBS variable is not very feasible. And the function “myKernelFunc()” simply does not exist. Try to compile your application by hand:

cc -c linkTest.cpp -o linkTest.o

nvcc -c kernelFile.cu -o kernelFile.o

cc linkTest.o kernelFile.o -o a.out -lcudart

That won’t work and result in one undefined reference to “myKernelFunc”, but if the other errors disappear you’re on the right way. Then revisit your Makefile.

Oh, i see the myKernelFunc is missing, i added it to my kernelFile.cu :">

I did compile my application by hand, the result looks like this:

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ cc -c linkTest.c -o linkTest.o

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ nvcc -c kernelFile.cu -o kernelFile.o

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ cc linkTest.o kernelFile.o -o a.out -L cudart

linkTest.o: In function `main':

linkTest.c:(.text+0x2d): undefined reference to `myCudaMalloc'

linkTest.c:(.text+0x77): undefined reference to `myCpyHost'

linkTest.c:(.text+0x88): undefined reference to `myKernelFunc'

linkTest.c:(.text+0x99): undefined reference to `myCpyDevice'

linkTest.c:(.text+0xa2): undefined reference to `myCudaFree'

kernelFile.o: In function `__sti____cudaRegisterAll_45_tmpxft_00003b28_00000000_4_k

ernelFile_cpp1_ii_11b6d742':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0x93e4): undefined reference to `__cudaRegisterFatBinary'

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0x943f): undefined reference to `__cudaRegisterFunction'

kernelFile.o: In function `__device_stub__Z8myKernelPffi':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0x9497): undefined reference to `cudaSetupArgument'

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0x94b6): undefined reference to `cudaSetupArgument'

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0x94d5): undefined reference to `cudaSetupArgument'

kernelFile.o: In function `__ftexfetch':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa0fa): undefined reference to `__cudaTextureFetch'

kernelFile.o: In function `__utexfetch':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa160): undefined reference to `__cudaTextureFetch'

kernelFile.o: In function `__itexfetch':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa1b1): undefined reference to `__cudaTextureFetch'

kernelFile.o: In function `__ftexfetchi':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa1f5): undefined reference to `__cudaTextureFetch'

kernelFile.o: In function `__utexfetchi':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa24e): undefined reference to `__cudaTextureFetch'

kernelFile.o:tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa292): more undefined references to `__cudaTextureFetch' follow

kernelFile.o: In function `__ullAtomicCAS':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa2ca): undefined reference to `__cudaMutexOperation'

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa306): undefined reference to `__cudaMutexOperation'

kernelFile.o: In function `__ullAtomicExch':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa326): undefined reference to `__cudaMutexOperation'

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa346): undefined reference to `__cudaMutexOperation'

kernelFile.o: In function `__ullAtomicAdd':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa366): undefined reference to `__cudaMutexOperation'

kernelFile.o:tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xa38e): more undefined references to `__cudaMutexOperation' follow

kernelFile.o: In function `__cudaUnregisterBinaryUtil':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xe1a6): undefined reference to `__cudaUnregisterFatBinary'

kernelFile.o: In function `myKernelFunc(float*, float, int)':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xe229): undefined reference to `cudaConfigureCall'

kernelFile.o: In function `myCudaFree(float*)':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xe255): undefined reference to `cudaFree'

kernelFile.o: In function `myCudaMalloc(float*, unsigned long)':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xe274): undefined reference to `cudaMalloc'

kernelFile.o: In function `myCpyDevice(float*, float*, unsigned long)':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xe2a0): undefined reference to `cudaMemcpy'

kernelFile.o: In function `myCpyHost(float*, float*, unsigned long)':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text+0xe2cc): undefined reference to `cudaMemcpy'

kernelFile.o: In function `cudaError cudaLaunch<char>(char*)':

tmpxft_00003b28_00000000-11_kernelFile.ii:(.text._Z10cudaLaunchIcE9cudaErrorPT_[cudaError cudaLaunch<char>(char*)]+0x11): undefined reference to `cudaLaunch'

kernelFile.o:(.eh_frame+0x12): undefined reference to `__gxx_personality_v0'

collect2: ld gab 1 als Ende-Status zurück

A lot of more errors :ph34r:

How to handle these strange errors " undefined reference to `__cudaRegisterFatBinary’"?

Regards.

AL

You used -L instead of -l (small L) when linking. So all the cuda runtime library functions are unknown. (I suppose behind cc, gcc is used)

Maybe your undefined reference errors are a name mangling problem. ‘cc’ interprets the functions as ANSI/ISO C and nvcc as C++ functions. Try that:

cc -c linkTest.c -o linkTest.o

nvcc --host-compilation C -c kernelFile.cu -o kernelFile.o

cc linkTest.o kernelFile.o -o a.out -l cudart

Hm i got some problems with the location of the cudart library:

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ cc linkTest.o kernelFile.o -o a.out -l cudart

/usr/bin/ld: cannot find -lcudart

collect2: ld gab 1 als Ende-Status zurück

The libcudart.so is actually located in /home/cs071011/cuda-inst/cuda/lib not in /usr/bin/ld, do you know how to tell him to search for the cudart library in these directory?

Regards.

AL

-l tells the linker that you need this library. -L tells the linker an additional directory where the library could be located:

cc -c linkTest.c -o linkTest.o

nvcc --host-compilation C -c kernelFile.cu -o kernelFile.o

cc linkTest.o kernelFile.o -o a.out -L /home/cs071011/cuda-inst/cuda/lib -l cudart

First of all thank you very much for spending your time in helping me :)

I did the compilation per hand the way you told me, result:

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ cc -c linkTest.c -o linkTest.o

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ nvcc --host-compilation C -c kernelFile.cu -o kernelFile.o

cs071011@simlab17ubuntu:~/cvode_single/examples/myexamples$ cc linkTest.o kernelFile.o -o a.out -L /home/cs071011/cuda-inst/cuda/lib -l cudart

linkTest.o: In function `main':

linkTest.c:(.text+0x2d): undefined reference to `myCudaMalloc'

linkTest.c:(.text+0x77): undefined reference to `myCpyHost'

linkTest.c:(.text+0x88): undefined reference to `myKernelFunc'

linkTest.c:(.text+0x99): undefined reference to `myCpyDevice'

linkTest.c:(.text+0xa2): undefined reference to `myCudaFree'

collect2: ld gab 1 als Ende-Status zurück

Still the same error, got any other ideas how to solve this problem? What does the “(.text+0x2d)” thing mean?

Regards.

AL

I suppose the position where the function is called.

And it is a name mangling problem. Add #include “kernelProt.h” in your kernelFile.cu and change your kernelProt.h:

#ifdef __cplusplus

extern "C"

{

#endif

void myKernelFunc(float *memGpu, float c, const int N);

void myCpyHost(float *memGpu, float *memCpu, size_t size);

void myCpyDevice(float *memCpu, float *memGpu, size_t size);

void myCudaMalloc(float *memGpu, size_t size);

void myCudaFree(float *memGpu);

#ifdef __cplusplus

}

#endif

But I have no idea why “–host-compilation C” did not help. It seams although the host-compilation flag is set to C instead of C++, nvcc mangles the function names in the C++ way. This is not what I would expect, thus it is (maybe) a nvcc bug!

You have 2 possibilities: simply use g++ instead of gcc, or you need to set extern “C” in the header file as shown above.

Changed my kernelProt.h as u suggested, still the same result :( But thanks for your efforts, nvcc bug would be not cool.

Regards.
AL

Did you add #include “kernelProt.h” in your kernelFile.cu? If you did, post the content of your 3 files.

Hello Tobi!

Sorry for my late response, was quite busy. You were right, the #include “kernelProt.h” was missing and there is no more error now. Anyways, there is still a dissatisfying result.

cs071011@simlab18ubuntu:~/cvode_single/examples/myexamples$ cc -c linkTest.c -o linkTest.o

cs071011@simlab18ubuntu:~/cvode_single/examples/myexamples$ nvcc --host-compilation C -c kernelFile.cu -o kernelFile.o

cs071011@simlab18ubuntu:~/cvode_single/examples/myexamples$ cc linkTest.o kernelFile.o -o a.out -L /home/cs071011/cuda-inst/cuda/lib -l cudart

cs071011@simlab18ubuntu:~/cvode_single/examples/myexamples$ ./a.out 

device memory allocated

data copied from CPU to GPU

launching kernel ..

data copied from GPU to CPU

2.000000

device memory freed

When running the a.out, the screen turns shortly black and the result of 2.0 is wrong, should be 4.0. Here again the 3 files:

linkTest.c:

#include<stdio.h>

#include<stdlib.h>

#include</home/cs071011/cvode_single/examples/myexamples/kernelProt.h>

int main() {

	float *hV, *dV, c = 2.0f;

	int i;

	const int N = 1024 * 512;

	size_t size = N * sizeof(float);

	  myCudaMalloc(dV, size);

	hV = (float *) malloc(size);

	for(i = 0; i < N; i++) {

		hV[i] = 2.0f;

	}

	

	myCpyHost(dV, hV, size);	

	myKernelFunc(dV, c, N);

	myCpyDevice(hV, dV, size);

	

	

	printf("\n%lf\n", hV[0]);

	

	myCudaFree(dV);

	free(hV);

	

	return 0;

}

kernelFile.cu:

#include<stdio.h>

#include<cuda.h>

#include</home/cs071011/cvode_single/examples/myexamples/kernelProt.h>

void myCpyHost(float *memGpu, float *memCpu, size_t size) {

	cudaMemcpy(memGpu, memCpu, size, cudaMemcpyHostToDevice);

	printf("\ndata copied from CPU to GPU\n");

}

void myCpyDevice(float *memCpu, float *memGpu, size_t size) {

	cudaMemcpy(memCpu, memGpu, size, cudaMemcpyDeviceToHost);

	printf("\ndata copied from GPU to CPU");

	

}

void myCudaMalloc(float *memGpu, size_t size) {

	cudaMalloc((void **) &memGpu, size);

	printf("\ndevice memory allocated\n");

}

void myCudaFree(float *memGpu) {

	cudaFree(memGpu);

	printf("\ndevice memory freed");

}

__global__ void myKernel(float *dx, float c, const int N) {

	int tid = threadIdx.x + blockDim.x * blockIdx.x;

	if(tid < N) {

		dx[tid] *= c;

	}

}

void myKernelFunc(float *memGpu, float c, const int N) {

	

	int NUM_THREADS = 256;

	int NUM_BLOCKS = (N + NUM_THREADS - 1) / NUM_THREADS;

	

	printf("launching kernel ..");

	myKernel<<<NUM_BLOCKS, NUM_THREADS>>>(memGpu, c, N);	

}

kernelProt.h:

#include<stdio.h>

#ifdef __cplusplus

extern "C"

{

#endif

void myCudaMalloc(float *memGpu, size_t size);

void myCpyHost(float *memGpu, float *memCpu, size_t size);

void myKernelFunc(float *memGpu, float c, const int N);

void myCpyDevice(float *memCpu, float *memGpu, size_t size);

void myCudaFree(float *memGpu);

#ifdef __cplusplus

}

#endif

Just removed the myKernelFunc(dV, c, N); line in my testLink.c file - no black screen, so there seem to be some problems with my kernel.

“myCudaMalloc” is wrong, modify as

void myCudaMalloc(float **memGpu, size_t size) {

	cudaMalloc((void **) memGpu, size);

	printf("\ndevice memory allocated\n");

}

in linkTest.cpp, use

myCudaMalloc( &dV, size);

Thank you very much LSChien, it finally works! :)

Also big thanks to tobi!

Regards.
AL