Hi all. I am trying to port an application to CUDA. One portion of code has a circular dependence (A.cu depends on B.cpp which depends on A.cu). Normally in C/C++ this can be solved with object linking by putting a forward declaration in the .hpp file and the definition/include in the cpp file. Below is an example of what I’m talking about
B.hpp:
class classA; //Forward declaration
...
class classB {
__device__ classA method1()
__device__ void method2()
}
B.cpp:
#include "B.hpp"
#include "A.hpp" //Include here for linker
...
__device__ classA classB::method1() { ... }
__device__ void classB::method2() { ... }
I tried this in the release version of CUDA, 4.2, and it doesn’t work, presumably because object linking was not yet added. In CUDA5 object linking was supposedly added, but I can’t figure out how to set up the Makefile to link to generated object files. I am not very experienced with setting up makefiles, and have been using the SDK makefile templates so far. Any help is much appreciated.
I tried just copying over a CUDA5 makefile, but I am getting various errors
#
# Build script for project
#
################################################################################
# OS Name (Linux or Darwin)
OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])
# Flags to detect 32-bit or 64-bit OS platform
OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/")
OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/")
# Flags to detect either a Linux system (linux) or Mac OSX (darwin)
DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
# Location of the CUDA Toolkit binaries and libraries
CUDA_PATH ?= /usr/local/cuda
CUDA_INC_PATH ?= $(CUDA_PATH)/include
CUDA_LIB_PATH ?= $(CUDA_PATH)/lib
CUDA_BIN_PATH ?= $(CUDA_PATH)/bin
# Common binaries
NVCC ?= $(CUDA_BIN_PATH)/nvcc
GCC ?= g++
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
# CUDA code generation flags
GENCODE_SM10 := -gencode arch=compute_10,code=sm_10
GENCODE_SM20 := -gencode arch=compute_20,code=sm_20
GENCODE_SM30 := -gencode arch=compute_30,code=sm_30
GENCODE_FLAGS := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)
# OS-specific build flags
ifneq ($(DARWIN),)
LDFLAGS := -Xlinker -rpath $(CUDA_LIB_PATH) -L$(CUDA_LIB_PATH) -lcudart
CCFLAGS := -arch $(OS_ARCH)
else
ifeq ($(OS_SIZE),32)
LDFLAGS := -L$(CUDA_LIB_PATH) -lcudart
CCFLAGS := -m32
else
LDFLAGS := -L$(CUDA_LIB_PATH)64 -lcudart
CCFLAGS := -m64
endif
endif
# OS-architecture specific flags
ifeq ($(OS_SIZE),32)
NVCCFLAGS := -m32
else
NVCCFLAGS := -m64
endif
# OpenGL specific libraries
ifneq ($(DARWIN),)
# Mac OSX specific libraries and paths to include
LIBPATH_OPENGL := -L../../common/lib/$(OSLOWER) -L/System/Library/Frameworks/OpenGL.framework/Libraries -lGL -lGLU -framework GLUT ../../common/lib/$(OSLOWER)/libGLEW.a
else
# Linux specific libraries and paths to include
# LIBPATH_OPENGL := -L../../common/lib/$(OSLOWER)/$(OS_ARCH) -L/usr/X11R6/lib -lGL -lGLU -lX11 -lXi -lXmu -lglut ../../common/lib/$(OSLOWER)/$(OS_ARCH)/libGLEW.a
LIBPATH_OPENGL := -L../../common/lib/$(OSLOWER)/$(OS_ARCH) -L/usr/X11R6/lib -lGL -lGLU -lX11 -lXi -lXmu -lglut ../../common/lib/$(OSLOWER)/libGLEW_x86_64.a
endif
ptxas := 1
maxregisters := 32
# Debug build flags
ifeq ($(dbg),1)
CCFLAGS += -g
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
ifdef maxregisters
NVCCFLAGS += -maxrregcount $(maxregisters)
endif
ifeq ($(ptxas), 1)
NVCCFLAGS += --ptxas-options=-v
endif
# Common includes and paths for CUDA, CURAND
INCLUDES := -I$(CUDA_INC_PATH) -I. -I.. -I../../common/inc -I../../../shared/inc -I../common/FreeImage/include
LDFLAGS += $(LIBPATH_OPENGL) -lcurand
The project-specific portion of the Makefile
# Target rules
all: build
build: A
B.o: B.cpp
$(NVCC) $(NVCCFLAGS) $(GENCODE_SM20) $(INCLUDES) -o $@ -c $<
A.o: A.cu
$(NVCC) $(NVCCFLAGS) $(GENCODE_SM20) $(INCLUDES) -o $@ -c $<
A: A.o B.o
$(NVCC) $(NVCCFLAGS) $(GENCODE_SM20) $(INCLUDES) -o $@ $+ $(LDFLAGS) $(EXTRA_LDFLAGS)
mkdir -p ../../bin/$(OSLOWER)/$(TARGET)
cp $@ ../../bin/$(OSLOWER)/$(TARGET)
run: build
./A
clean:
rm -f A A.o B.o
With this I get the error when it tries to compile A.o:
>make A.o
...
ptxas fatal : Unresolved extern function '_ZN6classBC1E9classAP7'
Then after I added the -dlink flag to the library compilation steps both object files compile, but the full project build fails
>make A
...
nvlink error : Undefined reference to '_ZNK6classB13method1Ev'
nvlink error : Undefined reference to '_ZN6classB10method2Ev'
Any help is much appreciated