My cuda code works fine before I used dynamic parallelism. To allow calling a child kernel inside a parent kernel, I modified my makefile to be:
#CC=gcc # used C-compiler
CC=g++
CFLAGS=-O3 -fopenmp # release C-Compiler flags
#CFLAGS= -g -Wall -pedantic # debugging C-Compiler flags
INCDIRS=-I/usr/local/cuda/include
LD=g++ # used linker
LFLAGS=-O3 -lstdc++ -std=c++11 -fopenmp -lpthread # linker flags
LIBS=-L. -lm -lz # used libraries
all: mven
clean:
rm -f *.o
rm -f mven
help: info
mven: nsampen.o math.o
$(LD) $(LFLAGS) -L/usr/local/cuda/lib64 -lcuda nsampen.o math.o -o mven $(LIBS) -lcudart
sampen.o: sampenv2.cu mathfunc.h
#$(CC) -c $(CFLAGS) sampenv2.cu -o mven.o
nvcc -std=c++11 -arch=sm_61 -c -dc sampenv2.cu -o sampen.o
nsampen.o: sampen.o math.o
nvcc -arch=sm_61 -dlink -o nsampen.o sampen.o math.o -lcudart
math.o: mathfunc.cpp mathfunc.h
$(CC) -c $(CFLAGS) mathfunc.cpp -o math.o
compiling seems to be fine, but link using g++ met the problem of "undefined reference to main and 3 others. Any suggestions?
Thank you so much!
Ze