Makefile template for Cocoa/CUDA projects Adopting the CUDA makefile template for Cocoa

I have ported a big CUDA flame fractal rendering project (flam4) from Windows to the Mac using Objective-C/C++ and Apple’s Cocoa GUI API.

(See http://sourceforge.net/projects/flam4/). I ended up using two XCode projects, one to create a dynamic library (.dylib) that encapsulates the CUDA nvcc processed code and one that provides a typical Mac GUI app that invokes the CUDA based dylib.

The dynamic library is built using a modified “common.mk” (based on NVidia’s SDK source) and a Makefile example (based again on NVidia’s SDK examples). The original NVidia common.mk does not alllow you to build Mac dynamic libraries or use Objective-C/C++ source files. I was able to extend the original common.mk file to support more Mac OSX development options.

The Makefile I used:

[codebox]# Add source files here

DYNAMIC_LIB := libFlam4CUDACore.dylib

SRCDIR := ./

ROOTDIR := .

Cuda source files (compiled with cudacc)

CUFILES_sm_10 := Flam4.cu

MMFILES := WorkerPThread.mm Flam4CUDA_PThreading.mm Flam4CUDACore.mm

Additional compiler flags and LIBs to include

FRAMEWORKS := OpenGL Cocoa AppKit

shows the actual commands being run by the makefile

verbose := 1

############################################################

####################

Rules and targets

include common.mk

[/codebox]

The modified common.mk is attached:

I dont see the common.mk attachment file anywhere so here it is:

[codebox]###################################################

#############################

Copyright 1993-2009 NVIDIA Corporation. All rights reserved.

NVIDIA Corporation and its licensors retain all intellectual property and

proprietary rights in and to this software and related documentation.

Any use, reproduction, disclosure, or distribution of this software

and related documentation without an express license agreement from

NVIDIA Corporation is strictly prohibited.

Please refer to the applicable NVIDIA end user license agreement (EULA)

associated with this source code for terms and conditions that govern

your use of this NVIDIA software.

############################################################

####################

Common build script for CUDA source projects for Linux and Mac platforms

############################################################

####################

Environment variables used by XCode Build process

OBJECT_FILE_DIR

BUILD_DIR

CONFiGURATION

.SUFFIXES : .cu .cu_dbg.o .c_dbg.o .cpp_dbg.o .cu_rel.o .c_rel.o .cpp_rel.o .cubin .ptx .m .mm

Add new SM Versions here as devices with new Compute Capability are released

SM_VERSIONS := sm_10 sm_11 sm_12 sm_13

CUDA_INSTALL_PATH ?= /usr/local/cuda

ifdef cuda-install

CUDA_INSTALL_PATH := $(cuda-install)

endif

detect OS

OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])

OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])

‘linux’ is output for Linux system, ‘darwin’ for OS X

DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))

ifneq ($(DARWIN),)

SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep “10.6” /System/Library/CoreServices/SystemVersion.plist)))

endif

detect 32-bit or 64-bit platform

HP_64 = $(shell uname -m | grep 64)

OSARCH= $(shell uname -m)

Basic directory setup for SDK

(override directories only if they are not already defined)

SRCDIR ?= ./

ROOTDIR ?= .

ROOTBINDIR ?= $(ROOTDIR)/bin

BINDIR ?= $(ROOTBINDIR)/$(OSLOWER)

COMMONDIR := $(ROOTDIR)/common

LIBDIR := $(ROOTDIR)/lib

ROOTOBJDIR ?= obj

LIBDIR := $(BUILD_DIR)/$(CONFIGURATION)

ROOTOBJDIR ?= $(OBJECT_FILE_DIR)

Compilers

NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc

CXX := g++

CC := gcc

LINK := g++ -fPIC

DYLINK := g++ -fPIC

Includes

INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc

Warning flags

CXXWARN_FLAGS := \

-W -Wall \

-Wimplicit \

-Wswitch \

-Wformat \

-Wchar-subscripts \

-Wparentheses \

-Wmultichar \

-Wtrigraphs \

-Wpointer-arith \

-Wcast-align \

-Wreturn-type \

-Wno-unused-function \

$(SPACE)

CWARN_FLAGS := $(CXXWARN_FLAGS) \

-Wstrict-prototypes \

-Wmissing-prototypes \

-Wmissing-declarations \

-Wnested-externs \

-Wmain \

architecture flag for nvcc and gcc compilers build

CUBIN_ARCH_FLAG :=

CXX_ARCH_FLAGS :=

NVCCFLAGS :=

LIB_ARCH := $(OSARCH)

#set NVCC to use ptxas verbose mode so we can see how each kernel uses device memory/registers

NVCCFLAGS += --ptxas-options=-v

Determining the necessary Cross-Compilation Flags

32-bit OS, but we target 64-bit cross compilation

ifeq ($(x86_64),1)

NVCCFLAGS += -m64

LIB_ARCH = x86_64

ifneq ($(DARWIN),)

     CXX_ARCH_FLAGS += -arch x86_64

else

     CXX_ARCH_FLAGS += -m64

endif

else

64-bit OS, and we target 32-bit cross compilation

ifeq ($(i386),1)

    NVCCFLAGS += -m32

    LIB_ARCH = i386

    ifneq ($(DARWIN),)

         CXX_ARCH_FLAGS += -arch i386

    else

         CXX_ARCH_FLAGS += -m32

    endif

else 

    ifneq ($(SNOWLEOPARD),)

         NVCCFLAGS += -m32

         CXX_ARCH_FLAGS += -arch i386 -m32

         LIB_ARCH  = i386

    endif

endif

endif

Compiler-specific flags

CXXFLAGS := $(CXXWARN_FLAGS) $(CXX_ARCH_FLAGS)

CFLAGS := $(CWARN_FLAGS) $(CXX_ARCH_FLAGS)

LINK += $(CXX_ARCH_FLAGS)

DYLINK += $(CXX_ARCH_FLAGS)

This option for Mac allows CUDA applications to work without requiring to set DYLD_LIBRARY_PATH

ifneq ($(DARWIN),)

LINK += -Xlinker -rpath $(CUDA_INSTALL_PATH)/lib -Xlinker -no_compact_linkedit

DYLINK += -Xlinker -rpath $(CUDA_INSTALL_PATH)/lib -Xlinker -no_compact_linkedit

endif

Common flags

COMMONFLAGS += $(INCLUDES) -DUNIX

Debug/release configuration

ifeq ($(dbg),1)

COMMONFLAGS += -g

NVCCFLAGS   += -D_DEBUG

CXXFLAGS    += -D_DEBUG -x objective-c++

CFLAGS      += -D_DEBUG -x objective-c

BINSUBDIR   := debug

#LIBSUFFIX  := D

LIBSUFFIX   := 

else

COMMONFLAGS += -O2 

BINSUBDIR   := release

LIBSUFFIX   := 

NVCCFLAGS   += --compiler-options -fno-strict-aliasing

CXXFLAGS    += -fno-strict-aliasing -x objective-c++

CFLAGS      += -fno-strict-aliasing -x objective-c

endif

architecture flag for cubin build

CUBIN_ARCH_FLAG :=

OpenGL is used or not (if it is used, then it is necessary to include GLEW)

ifeq ($(USEGLLIB),1)

ifneq ($(DARWIN),)

    OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries 

    OPENGLLIB += -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a

else

this case for linux platforms

OPENGLLIB := -lGL -lGLU -lX11 -lXi -lXmu

check if x86_64 flag has been set, otherwise, check HP_64 is i386/x86_64

    ifeq ($(x86_64),1) 

       OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64

    else

         ifeq ($(i386),)

             ifeq "$(strip $(HP_64))" ""

             OPENGLLIB += -lGLEW -L/usr/X11R6/lib

             else

             OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64

             endif

         endif

    endif

check if i386 flag has been set, otehrwise check HP_64 is i386/x86_64

    ifeq ($(i386),1)

       OPENGLLIB += -lGLEW -L/usr/X11R6/lib

    else

         ifeq ($(x86_64),)

             ifeq "$(strip $(HP_64))" ""

             OPENGLLIB += -lGLEW -L/usr/X11R6/lib

             else

             OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64

             endif

         endif

    endif

endif

endif

ifeq ($(USEGLUT),1)

ifneq ($(DARWIN),)

OPENGLLIB += -framework GLUT

else

    ifeq ($(x86_64),1)

     OPENGLLIB += -lglut -L/usr/lib64 

    endif

    ifeq ($(i386),1)

     OPENGLLIB += -lglut -L/usr/lib 

    endif

ifeq ($(x86_64),)

        ifeq ($(i386),)  

        OPENGLLIB += -lglut

        endif

    endif

endif

endif

ifeq ($(USEPARAMGL),1)

PARAMGLLIB := -lparamgl_$(LIB_ARCH)$(LIBSUFFIX)

endif

ifeq ($(USERENDERCHECKGL),1)

RENDERCHECKGLLIB := -lrendercheckgl_$(LIB_ARCH)$(LIBSUFFIX)

endif

ifeq ($(USECUDPP), 1)

ifeq ($(x86_64),1)

    CUDPPLIB := -lcudpp64

else

    ifneq ($(SNOWLEOPARD),) 

        CUDPPLIB := -lcudpp

    else

        ifeq "$(strip $(HP_64))" ""

           CUDPPLIB := -lcudpp

        else

           CUDPPLIB := -lcudpp64

        endif

    endif

endif

ifeq ($(i386),1)

    CUDPPLIB := -lcudpp

else

    ifneq ($(SNOWLEOPARD),) 

        CUDPPLIB := -lcudpp

    else

        ifeq "$(strip $(HP_64))" ""

           CUDPPLIB := -lcudpp

        else

           CUDPPLIB := -lcudpp64

        endif

    endif

endif

CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX)

ifeq ($(emu), 1)

    CUDPPLIB := $(CUDPPLIB)_emu

endif

endif

ifeq ($(USENVCUVID), 1)

 ifneq ($(DARWIN),)

     NVCUVIDLIB := -L../../common/lib/darwin -lnvcuvid

 endif

endif

Libs

ifneq ($(DARWIN),)

LIB       := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) $(NVCUVIDLIB) 

else

ifeq “$(strip $(HP_64))” “”

ifeq ($(x86_64),1)

   LIB       := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) 

else

   LIB       := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) 

endif

else

ifeq ($(i386),1)

   LIB       := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) 

else

   LIB       := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) 

endif

endif

endif

If dynamically linking to CUDA and CUDART, we exclude the libraries from the LIB

ifeq ($(USECUDADYNLIB),1)

 LIB += ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} -ldl -rdynamic 

else

static linking, we will statically link against CUDA and CUDART

ifeq ($(USEDRVAPI),1)

 LIB += -lcuda   ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} 

else

 LIB += -lcudart ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}

endif

endif

ifeq ($(USECUFFT),1)

ifeq ($(emu),1)

LIB += -lcufftemu

else

LIB += -lcufft

endif

endif

ifeq ($(USECUBLAS),1)

ifeq ($(emu),1)

LIB += -lcublasemu

else

LIB += -lcublas

endif

endif

ifneq ($(FRAMEWORKS),)

LIB +=  $(patsubst %,-framework %,$(notdir $(FRAMEWORKS)))

endif

Lib/exe configuration

ifneq ($(STATIC_LIB),)

TARGETDIR := $(LIBDIR)

TARGET   := $(subst .a,_$(LIB_ARCH)$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB))

LINKLINE  = ar rucv $(TARGET) $(OBJS)

else ifneq ($(DYNAMIC_LIB),)

TARGETDIR := $(LIBDIR)

TARGET    := $(subst .dylib,_$(LIB_ARCH)$(LIBSUFFIX).dylib,$(LIBDIR)/$(DYNAMIC_LIB))

TARGETLIB := $(subst .dylib,_$(LIB_ARCH)$(LIBSUFFIX).dylib,$(DYNAMIC_LIB))

LINKLINE  = $(DYLINK) -dynamiclib -install_name "@executable_path/../Frameworks/$(TARGETLIB)" -o $(TARGET) $(OBJS) $(LIB)

else

ifneq ($(OMIT_CUTIL_LIB),1)

	LIB += -lcutil_$(LIB_ARCH)$(LIBSUFFIX)

endif

# Device emulation configuration

ifeq ($(emu), 1)

	NVCCFLAGS   += -deviceemu

	CUDACCFLAGS += 

	BINSUBDIR   := emu$(BINSUBDIR)

	# consistency, makes developing easier

	CXXFLAGS		+= -D__DEVICE_EMULATION__

	CFLAGS			+= -D__DEVICE_EMULATION__

endif

TARGETDIR := $(BINDIR)/$(BINSUBDIR)

TARGET    := $(TARGETDIR)/$(EXECUTABLE)

LINKLINE  = $(LINK) -o $(TARGET) $(OBJS) $(LIB)

endif

check if verbose

ifeq ($(verbose), 1)

VERBOSE :=

else

VERBOSE := @

endif

############################################################

####################

Check for input flags and set compiler flags appropriately

############################################################

####################

ifeq ($(fastmath), 1)

NVCCFLAGS += -use_fast_math

endif

ifeq ($(keep), 1)

NVCCFLAGS += -keep

NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx

endif

ifdef maxregisters

NVCCFLAGS += -maxrregcount $(maxregisters)

endif

Add cudacc flags

NVCCFLAGS += $(CUDACCFLAGS)

Add common flags

NVCCFLAGS += $(COMMONFLAGS)

CXXFLAGS += $(COMMONFLAGS)

CFLAGS += $(COMMONFLAGS)

ifeq ($(nvcc_warn_verbose),1)

NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS)) 

NVCCFLAGS += --compiler-options -fno-strict-aliasing

endif

############################################################

####################

Set up object files

############################################################

####################

OBJDIR := $(ROOTOBJDIR)/$(LIB_ARCH)/$(BINSUBDIR)

OBJS += $(patsubst %.cpp,$(OBJDIR)/%.cpp.o,$(notdir $(CCFILES)))

OBJS += $(patsubst %.c,$(OBJDIR)/%.c.o,$(notdir $(CFILES)))

OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu.o,$(notdir $(CUFILES)))

OBJS += $(patsubst %.m,$(OBJDIR)/%.m.o,$(notdir $(MFILES)))

OBJS += $(patsubst %.mm,$(OBJDIR)/%.mm.o,$(notdir $(MMFILES)))

############################################################

####################

Set up cubin output files

############################################################

####################

CUBINDIR := $(SRCDIR)data

CUBINS += $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES)))

############################################################

####################

Set up PTX output files

############################################################

####################

PTXDIR := $(SRCDIR)data

PTXBINS += $(patsubst %.cu,$(PTXDIR)/%.ptx,$(notdir $(PTXFILES)))

############################################################

####################

Rules

############################################################

####################

$(OBJDIR)/%.c.o : $(SRCDIR)%.c $(C_DEPS)

$(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $<

$(OBJDIR)/%.cpp.o : $(SRCDIR)%.cpp $(C_DEPS)

$(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $<

$(OBJDIR)/%.m.o : $(SRCDIR)%.m $(C_DEPS)

$(VERBOSE)$(CXX) $(CFLAGS) -o $@ -c $<

$(OBJDIR)/%.mm.o : $(SRCDIR)%.mm $(C_DEPS)

$(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $<

$(OBJDIR)/%.cu.o : $(SRCDIR)%.cu $(CU_DEPS)

$(VERBOSE)$(NVCC) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $<

$(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory

$(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $<

$(PTXDIR)/%.ptx : $(SRCDIR)%.cu ptxdirectory

$(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -ptx $<

The following definition is a template that gets instantiated for each SM

version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things:

1. It adds to OBJS a .cu_sm_XX.o for each .cu file it finds in CUFILES_sm_XX.

2. It generates a rule for building .cu_sm_XX.o files from the corresponding

.cu file.

The intended use for this is to allow Makefiles that use common.mk to compile

files to different Compute Capability targets (aka SM arch version). To do

so, in the Makefile, list files for each SM arch separately, like so:

CUFILES_sm_10 := mycudakernel_sm10.cu app.cu

CUFILES_sm_12 := anothercudakernel_sm12.cu

define SMVERSION_template

OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1).o,$(notdir $(CUFILES_$(1))))

$(OBJDIR)/%.cu_$(1).o : $(SRCDIR)%.cu $(CU_DEPS)

$(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) -arch $(1)

endef

This line invokes the above template for each arch version stored in

SM_VERSIONS. The call funtion invokes the template, and the eval

function interprets it as make commands.

$(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver))))

$(TARGET): makedirectories $(OBJS) $(CUBINS) $(PTXBINS) Makefile

$(VERBOSE)$(LINKLINE)

cubindirectory:

$(VERBOSE)mkdir -p $(CUBINDIR)

ptxdirectory:

$(VERBOSE)mkdir -p $(PTXDIR)

makedirectories:

$(VERBOSE)mkdir -p $(LIBDIR)

$(VERBOSE)mkdir -p $(OBJDIR)

$(VERBOSE)mkdir -p $(TARGETDIR)

tidy :

$(VERBOSE)find . | egrep "#" | xargs rm -f

$(VERBOSE)find . | egrep "\~" | xargs rm -f

clean : tidy

$(VERBOSE)rm -f $(OBJS)

$(VERBOSE)rm -f $(CUBINS)

$(VERBOSE)rm -f $(PTXBINS)

$(VERBOSE)rm -f $(TARGET)

$(VERBOSE)rm -f $(NVCC_KEEP_CLEAN)

$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.ppm

$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.pgm

$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.bin

$(VERBOSE)rm -f $(ROOTBINDIR)/$(OSLOWER)/$(BINSUBDIR)/*.bmp

clobber : clean

$(VERBOSE)rm -rf $(ROOTOBJDIR)

[/codebox]

Nice work. However I recommend to change

COMMONDIR := $(ROOTDIR)/common
to
COMMONDIR ?= $(ROOTDIR)/common

in common.mk

Otherwise the COMMONDIR refers to ./common in your current directory.